Merge branch 'main' into issue-3443-require_approval
4
.github/actions/setup-runner/action.yml
vendored
|
@ -29,8 +29,8 @@ runs:
|
|||
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "Installing latest llama-stack-client-python from next branch"
|
||||
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@next
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
uv pip install llama-stack-client
|
||||
|
|
|
@ -44,8 +44,8 @@ runs:
|
|||
run: |
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "Installing latest llama-stack-client-python from next branch"
|
||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@next
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
unset LLAMA_STACK_CLIENT_DIR
|
||||
|
|
4
.github/workflows/conformance.yml
vendored
|
@ -66,6 +66,4 @@ jobs:
|
|||
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||
- name: Run OpenAPI Breaking Change Diff
|
||||
run: |
|
||||
oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
|
||||
--match-path '^/v1/vector-io' \
|
||||
--match-path '^/v1/vector-dbs'
|
||||
oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/'
|
||||
|
|
|
@ -7,7 +7,7 @@ sidebar_position: 1
|
|||
|
||||
### Server path
|
||||
|
||||
Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
|
||||
Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.
|
||||
|
||||
### Clients
|
||||
|
||||
|
@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")
|
|||
|
||||
#### OpenAI Client
|
||||
|
||||
When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
|
||||
When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
|
||||
client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
|
||||
```
|
||||
|
||||
Regardless of the client you choose, the following code examples should all work the same.
|
||||
|
|
|
@ -261,7 +261,7 @@ You can even run `llama model prompt-format` see all of the templates and their
|
|||
```
|
||||
llama model prompt-format -m Llama3.2-3B-Instruct
|
||||
```
|
||||

|
||||

|
||||
|
||||
|
||||
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
|
||||
|
|
|
@ -217,7 +217,6 @@ from llama_stack_client.types import (
|
|||
Methods:
|
||||
|
||||
- <code title="post /v1/inference/chat-completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
|
||||
- <code title="post /v1/inference/completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_response.py">InferenceCompletionResponse</a></code>
|
||||
- <code title="post /v1/inference/embeddings">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
|
||||
|
||||
## VectorIo
|
||||
|
|
|
@ -824,16 +824,10 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
|
||||
" content=user_input,\n",
|
||||
" stream=False,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 50,\n",
|
||||
" },\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": user_input}],\n",
|
||||
" max_tokens=50,\n",
|
||||
" response_format={\n",
|
||||
" \"type\": \"json_schema\",\n",
|
||||
" \"json_schema\": Output.model_json_schema(),\n",
|
||||
|
@ -1013,7 +1007,7 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/resources/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
|
||||
"<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/static/img/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Agents are characterized by having access to\n",
|
||||
|
|
|
@ -706,20 +706,15 @@
|
|||
" provider_id=\"nvidia\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
"response = client.completions.create(\n",
|
||||
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
" stream=False,\n",
|
||||
" model_id=CUSTOMIZED_MODEL_DIR,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" },\n",
|
||||
" model=CUSTOMIZED_MODEL_DIR,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_p=0.9,\n",
|
||||
" max_tokens=20,\n",
|
||||
")\n",
|
||||
"print(f\"Inference response: {response.content}\")"
|
||||
"print(f\"Inference response: {response.choices[0].text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1233,20 +1228,15 @@
|
|||
" provider_id=\"nvidia\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
"response = client.completions.create(\n",
|
||||
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
" stream=False,\n",
|
||||
" model_id=customized_chat_model_dir,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" },\n",
|
||||
" model=customized_chat_model_dir,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_p=0.9,\n",
|
||||
" max_tokens=20,\n",
|
||||
")\n",
|
||||
"print(f\"Inference response: {response.content}\")"
|
||||
"print(f\"Inference response: {response.choices[0].text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import hashlib
|
||||
import inspect
|
||||
import ipaddress
|
||||
import types
|
||||
import typing
|
||||
|
@ -12,6 +13,7 @@ from dataclasses import make_dataclass
|
|||
from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
|
||||
|
||||
from fastapi import UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.datatypes import Error
|
||||
from llama_stack.strong_typing.core import JsonType
|
||||
|
@ -632,14 +634,22 @@ class Generator:
|
|||
base_type = get_args(param_type)[0]
|
||||
else:
|
||||
base_type = param_type
|
||||
|
||||
# Check if the type is optional
|
||||
is_optional = is_type_optional(base_type)
|
||||
if is_optional:
|
||||
base_type = unwrap_optional_type(base_type)
|
||||
|
||||
if base_type is UploadFile:
|
||||
# File upload
|
||||
properties[name] = {"type": "string", "format": "binary"}
|
||||
else:
|
||||
# Form field
|
||||
# All other types - generate schema reference
|
||||
# This includes enums, BaseModels, and simple types
|
||||
properties[name] = self.schema_builder.classdef_to_ref(base_type)
|
||||
|
||||
required_fields.append(name)
|
||||
if not is_optional:
|
||||
required_fields.append(name)
|
||||
|
||||
multipart_schema = {
|
||||
"type": "object",
|
||||
|
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 128 KiB |
Before Width: | Height: | Size: 220 KiB After Width: | Height: | Size: 220 KiB |
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 71 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 170 KiB After Width: | Height: | Size: 170 KiB |
905
docs/static/llama-stack-spec.html
vendored
564
docs/static/llama-stack-spec.yaml
vendored
|
@ -286,7 +286,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CreateAgentTurnRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses:
|
||||
/v1/responses:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -558,7 +558,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/responses/{response_id}:
|
||||
/v1/responses/{response_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -720,41 +720,6 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/inference/embeddings:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
An array of embeddings, one for each content. Each embedding is a list
|
||||
of floats. The dimensionality of the embedding is model-specific; you
|
||||
can check model metadata using /models/{model_id}.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EmbeddingsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate embeddings for content pieces using the specified model.
|
||||
description: >-
|
||||
Generate embeddings for content pieces using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EmbeddingsRequest'
|
||||
required: true
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
|
@ -1033,7 +998,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/chat/completions/{completion_id}:
|
||||
/v1/chat/completions/{completion_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -2259,7 +2224,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||
required: true
|
||||
/v1/openai/v1/chat/completions:
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -2452,7 +2417,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RegisterModelRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses/{response_id}/input_items:
|
||||
/v1/responses/{response_id}/input_items:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -2906,7 +2871,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/LogEventRequest'
|
||||
required: true
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files:
|
||||
/v1/vector_stores/{vector_store_id}/files:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3015,7 +2980,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
|
||||
required: true
|
||||
/v1/openai/v1/completions:
|
||||
/v1/completions:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3049,7 +3014,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiCompletionRequest'
|
||||
required: true
|
||||
/v1/openai/v1/vector_stores:
|
||||
/v1/vector_stores:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3136,7 +3101,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
|
||||
required: true
|
||||
/v1/openai/v1/files/{file_id}:
|
||||
/v1/files/{file_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3201,7 +3166,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}:
|
||||
/v1/vector_stores/{vector_store_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3298,7 +3263,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
|
||||
/v1/vector_stores/{vector_store_id}/files/{file_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3416,7 +3381,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/embeddings:
|
||||
/v1/embeddings:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3451,7 +3416,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiEmbeddingsRequest'
|
||||
required: true
|
||||
/v1/openai/v1/files:
|
||||
/v1/files:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3544,8 +3509,6 @@ paths:
|
|||
- purpose: The intended purpose of the uploaded file.
|
||||
|
||||
- expires_after: Optional form values describing expiration for the file.
|
||||
Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
|
||||
Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -3558,45 +3521,13 @@ paths:
|
|||
format: binary
|
||||
purpose:
|
||||
$ref: '#/components/schemas/OpenAIFilePurpose'
|
||||
expires_after_anchor:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
expires_after_seconds:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
expires_after:
|
||||
$ref: '#/components/schemas/ExpiresAfter'
|
||||
required:
|
||||
- file
|
||||
- purpose
|
||||
- expires_after_anchor
|
||||
- expires_after_seconds
|
||||
required: true
|
||||
/v1/openai/v1/models:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A OpenAIListModelsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIListModelsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: List models using the OpenAI API.
|
||||
description: List models using the OpenAI API.
|
||||
parameters: []
|
||||
/v1/openai/v1/files/{file_id}/content:
|
||||
/v1/files/{file_id}/content:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3630,7 +3561,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
|
||||
/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3670,7 +3601,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/search:
|
||||
/v1/vector_stores/{vector_store_id}/search:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -3930,7 +3861,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
/v1/inference/rerank:
|
||||
/v1alpha/inference/rerank:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -4098,7 +4029,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RunEvalRequest'
|
||||
required: true
|
||||
/v1/openai/v1/moderations:
|
||||
/v1/moderations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -4656,7 +4587,16 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
strategy:
|
||||
$ref: '#/components/schemas/SamplingStrategy'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/GreedySamplingStrategy'
|
||||
- $ref: '#/components/schemas/TopPSamplingStrategy'
|
||||
- $ref: '#/components/schemas/TopKSamplingStrategy'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
greedy: '#/components/schemas/GreedySamplingStrategy'
|
||||
top_p: '#/components/schemas/TopPSamplingStrategy'
|
||||
top_k: '#/components/schemas/TopKSamplingStrategy'
|
||||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
|
@ -4684,17 +4624,6 @@ components:
|
|||
- strategy
|
||||
title: SamplingParams
|
||||
description: Sampling parameters.
|
||||
SamplingStrategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/GreedySamplingStrategy'
|
||||
- $ref: '#/components/schemas/TopPSamplingStrategy'
|
||||
- $ref: '#/components/schemas/TopKSamplingStrategy'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
greedy: '#/components/schemas/GreedySamplingStrategy'
|
||||
top_p: '#/components/schemas/TopPSamplingStrategy'
|
||||
top_k: '#/components/schemas/TopKSamplingStrategy'
|
||||
SystemMessage:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5141,7 +5070,16 @@ components:
|
|||
- progress
|
||||
description: Type of the event
|
||||
delta:
|
||||
$ref: '#/components/schemas/ContentDelta'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/TextDelta'
|
||||
- $ref: '#/components/schemas/ImageDelta'
|
||||
- $ref: '#/components/schemas/ToolCallDelta'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
text: '#/components/schemas/TextDelta'
|
||||
image: '#/components/schemas/ImageDelta'
|
||||
tool_call: '#/components/schemas/ToolCallDelta'
|
||||
description: >-
|
||||
Content generated since last event. This can be one or more tokens, or
|
||||
a tool call.
|
||||
|
@ -5184,17 +5122,6 @@ components:
|
|||
title: ChatCompletionResponseStreamChunk
|
||||
description: >-
|
||||
A chunk of a streamed chat completion response.
|
||||
ContentDelta:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/TextDelta'
|
||||
- $ref: '#/components/schemas/ImageDelta'
|
||||
- $ref: '#/components/schemas/ToolCallDelta'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
text: '#/components/schemas/TextDelta'
|
||||
image: '#/components/schemas/ImageDelta'
|
||||
tool_call: '#/components/schemas/ToolCallDelta'
|
||||
ImageDelta:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5876,7 +5803,22 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
payload:
|
||||
$ref: '#/components/schemas/AgentTurnResponseEventPayload'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
discriminator:
|
||||
propertyName: event_type
|
||||
mapping:
|
||||
step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
|
||||
step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
|
||||
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
description: >-
|
||||
Event-specific payload containing event data
|
||||
additionalProperties: false
|
||||
|
@ -5885,23 +5827,6 @@ components:
|
|||
title: AgentTurnResponseEvent
|
||||
description: >-
|
||||
An event in an agent turn response stream.
|
||||
AgentTurnResponseEventPayload:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
discriminator:
|
||||
propertyName: event_type
|
||||
mapping:
|
||||
step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
|
||||
step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
|
||||
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
AgentTurnResponseStepCompletePayload:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5980,7 +5905,16 @@ components:
|
|||
description: >-
|
||||
Unique identifier for the step within a turn
|
||||
delta:
|
||||
$ref: '#/components/schemas/ContentDelta'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/TextDelta'
|
||||
- $ref: '#/components/schemas/ImageDelta'
|
||||
- $ref: '#/components/schemas/ToolCallDelta'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
text: '#/components/schemas/TextDelta'
|
||||
image: '#/components/schemas/ImageDelta'
|
||||
tool_call: '#/components/schemas/ToolCallDelta'
|
||||
description: >-
|
||||
Incremental content changes during step execution
|
||||
additionalProperties: false
|
||||
|
@ -6968,10 +6902,6 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) Truncation strategy applied to the response
|
||||
user:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) User identifier associated with the request
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
@ -7104,15 +7034,6 @@ components:
|
|||
title: OpenAIResponseOutputMessageMCPListTools
|
||||
description: >-
|
||||
MCP list tools output message containing available tools from an MCP server.
|
||||
OpenAIResponseContentPart:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
OpenAIResponseContentPartOutputText:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -7220,7 +7141,14 @@ components:
|
|||
description: >-
|
||||
Unique identifier of the output item containing this content part
|
||||
part:
|
||||
$ref: '#/components/schemas/OpenAIResponseContentPart'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
description: The content part that was added
|
||||
sequence_number:
|
||||
type: integer
|
||||
|
@ -7255,7 +7183,14 @@ components:
|
|||
description: >-
|
||||
Unique identifier of the output item containing this content part
|
||||
part:
|
||||
$ref: '#/components/schemas/OpenAIResponseContentPart'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
|
||||
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
|
||||
description: The completed content part
|
||||
sequence_number:
|
||||
type: integer
|
||||
|
@ -7541,7 +7476,22 @@ components:
|
|||
description: >-
|
||||
Unique identifier of the response containing this output
|
||||
item:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
message: '#/components/schemas/OpenAIResponseMessage'
|
||||
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
description: >-
|
||||
The output item that was added (message, tool call, etc.)
|
||||
output_index:
|
||||
|
@ -7577,7 +7527,22 @@ components:
|
|||
description: >-
|
||||
Unique identifier of the response containing this output
|
||||
item:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
message: '#/components/schemas/OpenAIResponseMessage'
|
||||
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
description: >-
|
||||
The completed output item (message, tool call, etc.)
|
||||
output_index:
|
||||
|
@ -7846,72 +7811,6 @@ components:
|
|||
title: OpenAIDeleteResponseObject
|
||||
description: >-
|
||||
Response object confirming deletion of an OpenAI response.
|
||||
EmbeddingsRequest:
|
||||
type: object
|
||||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be an embedding model
|
||||
registered with Llama Stack and available via the /models endpoint.
|
||||
contents:
|
||||
oneOf:
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
description: >-
|
||||
List of contents to generate embeddings for. Each content can be a string
|
||||
or an InterleavedContentItem (and hence can be multimodal). The behavior
|
||||
depends on the model and provider. Some models may only support text.
|
||||
text_truncation:
|
||||
type: string
|
||||
enum:
|
||||
- none
|
||||
- start
|
||||
- end
|
||||
description: >-
|
||||
(Optional) Config for how to truncate text for embedding when text is
|
||||
longer than the model's max sequence length.
|
||||
output_dimension:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) Output dimensionality for the embeddings. Only supported by
|
||||
Matryoshka models.
|
||||
task_type:
|
||||
type: string
|
||||
enum:
|
||||
- query
|
||||
- document
|
||||
description: >-
|
||||
(Optional) How is the embedding being used? This is only supported by
|
||||
asymmetric embedding models.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- contents
|
||||
title: EmbeddingsRequest
|
||||
EmbeddingsResponse:
|
||||
type: object
|
||||
properties:
|
||||
embeddings:
|
||||
type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
description: >-
|
||||
List of embedding vectors, one per input content. Each embedding is a
|
||||
list of floats. The dimensionality of the embedding is model-specific;
|
||||
you can check model metadata using /models/{model_id}
|
||||
additionalProperties: false
|
||||
required:
|
||||
- embeddings
|
||||
title: EmbeddingsResponse
|
||||
description: >-
|
||||
Response containing generated embeddings.
|
||||
AgentCandidate:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -7966,7 +7865,14 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
eval_candidate:
|
||||
$ref: '#/components/schemas/EvalCandidate'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
- $ref: '#/components/schemas/AgentCandidate'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
model: '#/components/schemas/ModelCandidate'
|
||||
agent: '#/components/schemas/AgentCandidate'
|
||||
description: The candidate to evaluate.
|
||||
scoring_params:
|
||||
type: object
|
||||
|
@ -7987,15 +7893,6 @@ components:
|
|||
title: BenchmarkConfig
|
||||
description: >-
|
||||
A benchmark configuration for evaluation.
|
||||
EvalCandidate:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
- $ref: '#/components/schemas/AgentCandidate'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
model: '#/components/schemas/ModelCandidate'
|
||||
agent: '#/components/schemas/AgentCandidate'
|
||||
LLMAsJudgeScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -8459,7 +8356,20 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
message:
|
||||
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||
discriminator:
|
||||
propertyName: role
|
||||
mapping:
|
||||
user: '#/components/schemas/OpenAIUserMessageParam'
|
||||
system: '#/components/schemas/OpenAISystemMessageParam'
|
||||
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||
tool: '#/components/schemas/OpenAIToolMessageParam'
|
||||
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||
description: The message from the model
|
||||
finish_reason:
|
||||
type: string
|
||||
|
@ -8752,15 +8662,6 @@ components:
|
|||
- model
|
||||
- input_messages
|
||||
title: OpenAICompletionWithInputMessages
|
||||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -8795,7 +8696,14 @@ components:
|
|||
description: >-
|
||||
Purpose of the dataset indicating its intended use
|
||||
source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
description: >-
|
||||
Data source configuration for the dataset
|
||||
metadata:
|
||||
|
@ -9041,31 +8949,6 @@ components:
|
|||
- type
|
||||
title: ObjectType
|
||||
description: Parameter type for object values.
|
||||
ParamType:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/StringType'
|
||||
- $ref: '#/components/schemas/NumberType'
|
||||
- $ref: '#/components/schemas/BooleanType'
|
||||
- $ref: '#/components/schemas/ArrayType'
|
||||
- $ref: '#/components/schemas/ObjectType'
|
||||
- $ref: '#/components/schemas/JsonType'
|
||||
- $ref: '#/components/schemas/UnionType'
|
||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
||||
- $ref: '#/components/schemas/CompletionInputType'
|
||||
- $ref: '#/components/schemas/AgentTurnInputType'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
string: '#/components/schemas/StringType'
|
||||
number: '#/components/schemas/NumberType'
|
||||
boolean: '#/components/schemas/BooleanType'
|
||||
array: '#/components/schemas/ArrayType'
|
||||
object: '#/components/schemas/ObjectType'
|
||||
json: '#/components/schemas/JsonType'
|
||||
union: '#/components/schemas/UnionType'
|
||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
||||
completion_input: '#/components/schemas/CompletionInputType'
|
||||
agent_turn_input: '#/components/schemas/AgentTurnInputType'
|
||||
ScoringFn:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -9104,7 +8987,30 @@ components:
|
|||
- type: array
|
||||
- type: object
|
||||
return_type:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/StringType'
|
||||
- $ref: '#/components/schemas/NumberType'
|
||||
- $ref: '#/components/schemas/BooleanType'
|
||||
- $ref: '#/components/schemas/ArrayType'
|
||||
- $ref: '#/components/schemas/ObjectType'
|
||||
- $ref: '#/components/schemas/JsonType'
|
||||
- $ref: '#/components/schemas/UnionType'
|
||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
||||
- $ref: '#/components/schemas/CompletionInputType'
|
||||
- $ref: '#/components/schemas/AgentTurnInputType'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
string: '#/components/schemas/StringType'
|
||||
number: '#/components/schemas/NumberType'
|
||||
boolean: '#/components/schemas/BooleanType'
|
||||
array: '#/components/schemas/ArrayType'
|
||||
object: '#/components/schemas/ObjectType'
|
||||
json: '#/components/schemas/JsonType'
|
||||
union: '#/components/schemas/UnionType'
|
||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
||||
completion_input: '#/components/schemas/CompletionInputType'
|
||||
agent_turn_input: '#/components/schemas/AgentTurnInputType'
|
||||
params:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
additionalProperties: false
|
||||
|
@ -10234,10 +10140,6 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) Truncation strategy applied to the response
|
||||
user:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) User identifier associated with the request
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
@ -10560,7 +10462,14 @@ components:
|
|||
description: >-
|
||||
Event type identifier set to STRUCTURED_LOG
|
||||
payload:
|
||||
$ref: '#/components/schemas/StructuredLogPayload'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
description: >-
|
||||
The structured payload data for the log event
|
||||
additionalProperties: false
|
||||
|
@ -10573,15 +10482,6 @@ components:
|
|||
title: StructuredLogEvent
|
||||
description: >-
|
||||
A structured log event containing typed payload data.
|
||||
StructuredLogPayload:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
StructuredLogType:
|
||||
type: string
|
||||
enum:
|
||||
|
@ -10790,7 +10690,14 @@ components:
|
|||
description: >-
|
||||
Key-value attributes associated with the file
|
||||
chunking_strategy:
|
||||
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
description: >-
|
||||
Strategy used for splitting the file into chunks
|
||||
created_at:
|
||||
|
@ -11805,38 +11712,6 @@ components:
|
|||
title: VectorStoreListFilesResponse
|
||||
description: >-
|
||||
Response from listing files in a vector store.
|
||||
OpenAIModel:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
created:
|
||||
type: integer
|
||||
owned_by:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- object
|
||||
- created
|
||||
- owned_by
|
||||
title: OpenAIModel
|
||||
description: A model from OpenAI.
|
||||
OpenAIListModelsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIModel'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: OpenAIListModelsResponse
|
||||
VectorStoreListResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -12102,6 +11977,25 @@ components:
|
|||
required:
|
||||
- attributes
|
||||
title: OpenaiUpdateVectorStoreFileRequest
|
||||
ExpiresAfter:
|
||||
type: object
|
||||
properties:
|
||||
anchor:
|
||||
type: string
|
||||
const: created_at
|
||||
seconds:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- anchor
|
||||
- seconds
|
||||
title: ExpiresAfter
|
||||
description: >-
|
||||
Control expiration of uploaded files.
|
||||
|
||||
Params:
|
||||
- anchor, must be "created_at"
|
||||
- seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
|
||||
DPOAlignmentConfig:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -12387,7 +12281,14 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
query_generator_config:
|
||||
$ref: '#/components/schemas/RAGQueryGeneratorConfig'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
description: Configuration for the query generator.
|
||||
max_tokens_in_context:
|
||||
type: integer
|
||||
|
@ -12430,15 +12331,6 @@ components:
|
|||
title: RAGQueryConfig
|
||||
description: >-
|
||||
Configuration for the RAG query generation.
|
||||
RAGQueryGeneratorConfig:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
RAGSearchMode:
|
||||
type: string
|
||||
enum:
|
||||
|
@ -12874,6 +12766,15 @@ components:
|
|||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
RegisterDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -12958,6 +12859,31 @@ components:
|
|||
required:
|
||||
- model_id
|
||||
title: RegisterModelRequest
|
||||
ParamType:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/StringType'
|
||||
- $ref: '#/components/schemas/NumberType'
|
||||
- $ref: '#/components/schemas/BooleanType'
|
||||
- $ref: '#/components/schemas/ArrayType'
|
||||
- $ref: '#/components/schemas/ObjectType'
|
||||
- $ref: '#/components/schemas/JsonType'
|
||||
- $ref: '#/components/schemas/UnionType'
|
||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
||||
- $ref: '#/components/schemas/CompletionInputType'
|
||||
- $ref: '#/components/schemas/AgentTurnInputType'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
string: '#/components/schemas/StringType'
|
||||
number: '#/components/schemas/NumberType'
|
||||
boolean: '#/components/schemas/BooleanType'
|
||||
array: '#/components/schemas/ArrayType'
|
||||
object: '#/components/schemas/ObjectType'
|
||||
json: '#/components/schemas/JsonType'
|
||||
union: '#/components/schemas/UnionType'
|
||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
||||
completion_input: '#/components/schemas/CompletionInputType'
|
||||
agent_turn_input: '#/components/schemas/AgentTurnInputType'
|
||||
RegisterScoringFunctionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -694,7 +694,7 @@ class Agents(Protocol):
|
|||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
@ -706,7 +706,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
|
@ -731,7 +731,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -749,7 +749,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_response_input_items(
|
||||
self,
|
||||
response_id: str,
|
||||
|
@ -771,7 +771,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete an OpenAI response by its ID.
|
||||
|
||||
|
|
|
@ -363,7 +363,6 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param text: Text formatting configuration for the response
|
||||
:param top_p: (Optional) Nucleus sampling parameter used for generation
|
||||
:param truncation: (Optional) Truncation strategy applied to the response
|
||||
:param user: (Optional) User identifier associated with the request
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
|
@ -381,7 +380,6 @@ class OpenAIResponseObject(BaseModel):
|
|||
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
||||
top_p: float | None = None
|
||||
truncation: str | None = None
|
||||
user: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
@ -43,7 +43,7 @@ class Batches(Protocol):
|
|||
Note: This API is currently under active development and may undergo changes.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_batch(
|
||||
self,
|
||||
input_file_id: str,
|
||||
|
@ -63,7 +63,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Retrieve information about a specific batch.
|
||||
|
||||
|
@ -72,7 +72,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Cancel a batch that is in progress.
|
||||
|
||||
|
@ -81,7 +81,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_batches(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
|
|
@ -105,14 +105,12 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||
# TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
@ -120,15 +118,16 @@ class Files(Protocol):
|
|||
The file upload should be a multipart form request with:
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
- purpose: The intended purpose of the uploaded file.
|
||||
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
||||
- expires_after: Optional form values describing expiration for the file.
|
||||
|
||||
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
||||
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
||||
:param expires_after: Optional form values describing expiration for the file.
|
||||
:returns: An OpenAIFileObject representing the uploaded file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -147,7 +146,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file(
|
||||
self,
|
||||
file_id: str,
|
||||
|
@ -160,7 +159,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_file(
|
||||
self,
|
||||
file_id: str,
|
||||
|
@ -173,7 +172,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file_content(
|
||||
self,
|
||||
file_id: str,
|
||||
|
|
|
@ -17,11 +17,11 @@ from typing import (
|
|||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
|
||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
|
@ -1070,27 +1070,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
"""Generate embeddings for content pieces using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param contents: List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text.
|
||||
:param output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models.
|
||||
:param text_truncation: (Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length.
|
||||
:param task_type: (Optional) How is the embedding being used? This is only supported by asymmetric embedding models.
|
||||
:returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def rerank(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1109,7 +1089,7 @@ class InferenceProvider(Protocol):
|
|||
raise NotImplementedError("Reranking is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_completion(
|
||||
self,
|
||||
# Standard OpenAI completion parameters
|
||||
|
@ -1160,7 +1140,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1216,7 +1196,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1245,7 +1225,7 @@ class Inference(InferenceProvider):
|
|||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_chat_completions(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -1263,7 +1243,7 @@ class Inference(InferenceProvider):
|
|||
"""
|
||||
raise NotImplementedError("List chat completions is not implemented")
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||
"""Describe a chat completion by its ID.
|
||||
|
||||
|
|
|
@ -111,14 +111,6 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||
"""List models using the OpenAI API.
|
||||
|
||||
:returns: A OpenAIListModelsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_model(
|
||||
self,
|
||||
|
|
|
@ -114,7 +114,7 @@ class Safety(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
|
|
|
@ -473,7 +473,7 @@ class VectorIO(Protocol):
|
|||
...
|
||||
|
||||
# OpenAI Vector Stores API endpoints
|
||||
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
|
@ -499,7 +499,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int | None = 20,
|
||||
|
@ -517,7 +517,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -529,7 +529,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -547,7 +547,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -559,7 +559,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -585,7 +585,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -603,7 +603,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -625,9 +625,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -642,7 +640,7 @@ class VectorIO(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
|
@ -659,9 +657,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_update_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -677,9 +673,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
|
|
@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel):
|
|||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||
|
||||
|
||||
class ResponsesStoreConfig(BaseModel):
|
||||
sql_store_config: SqlStoreConfig
|
||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
|
||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||
|
||||
|
||||
class StackRunConfig(BaseModel):
|
||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ from llama_stack.apis.telemetry import Telemetry
|
|||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDBs
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
|
@ -412,8 +413,14 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
|
|||
|
||||
mro = type(obj).__mro__
|
||||
for name, value in inspect.getmembers(protocol):
|
||||
if inspect.isfunction(value) and hasattr(value, "__webmethod__"):
|
||||
if value.__webmethod__.experimental:
|
||||
if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
|
||||
has_alpha_api = False
|
||||
for webmethod in value.__webmethods__:
|
||||
if webmethod.level == LLAMA_STACK_API_V1ALPHA:
|
||||
has_alpha_api = True
|
||||
break
|
||||
# if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
|
||||
if has_alpha_api:
|
||||
continue
|
||||
if not hasattr(obj, name):
|
||||
missing_methods.append((name, "missing"))
|
||||
|
|
|
@ -16,7 +16,6 @@ from pydantic import Field, TypeAdapter
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.inference import (
|
||||
|
@ -26,8 +25,6 @@ from llama_stack.apis.inference import (
|
|||
CompletionMessage,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
ListOpenAIChatCompletionResponse,
|
||||
LogProbConfig,
|
||||
|
@ -48,7 +45,6 @@ from llama_stack.apis.inference import (
|
|||
ResponseFormat,
|
||||
SamplingParams,
|
||||
StopReason,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -312,25 +308,6 @@ class InferenceRouter(Inference):
|
|||
|
||||
return response
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
logger.debug(f"InferenceRouter.embeddings: {model_id}")
|
||||
await self._get_model(model_id, ModelType.embedding)
|
||||
provider = await self.routing_table.get_provider_impl(model_id)
|
||||
return await provider.embeddings(
|
||||
model_id=model_id,
|
||||
contents=contents,
|
||||
text_truncation=text_truncation,
|
||||
output_dimension=output_dimension,
|
||||
task_type=task_type,
|
||||
)
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -924,7 +924,7 @@ async def get_raw_document_text(document: Document) -> str:
|
|||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
|
||||
elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")):
|
||||
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
|
||||
|
||||
if isinstance(document.content, URL):
|
||||
|
|
|
@ -52,6 +52,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal
|
|||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||
|
||||
|
||||
def convert_tooldef_to_chat_tool(tool_def):
|
||||
"""Convert a ToolDef to OpenAI ChatCompletionToolParam format.
|
||||
|
||||
Args:
|
||||
tool_def: ToolDef from the tools API
|
||||
|
||||
Returns:
|
||||
ChatCompletionToolParam suitable for OpenAI chat completion
|
||||
"""
|
||||
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
|
||||
|
||||
internal_tool_def = ToolDefinition(
|
||||
tool_name=tool_def.name,
|
||||
description=tool_def.description,
|
||||
parameters={
|
||||
param.name: ToolParamDefinition(
|
||||
param_type=param.parameter_type,
|
||||
description=param.description,
|
||||
required=param.required,
|
||||
default=param.default,
|
||||
items=param.items,
|
||||
)
|
||||
for param in tool_def.parameters
|
||||
},
|
||||
)
|
||||
return convert_tooldef_to_openai_tool(internal_tool_def)
|
||||
|
||||
|
||||
class StreamingResponseOrchestrator:
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -580,23 +610,7 @@ class StreamingResponseOrchestrator:
|
|||
continue
|
||||
if not always_allowed or t.name in always_allowed:
|
||||
# Add to chat tools for inference
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
|
||||
|
||||
tool_def = ToolDefinition(
|
||||
tool_name=t.name,
|
||||
description=t.description,
|
||||
parameters={
|
||||
param.name: ToolParamDefinition(
|
||||
param_type=param.parameter_type,
|
||||
description=param.description,
|
||||
required=param.required,
|
||||
default=param.default,
|
||||
)
|
||||
for param in t.parameters
|
||||
},
|
||||
)
|
||||
openai_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
openai_tool = convert_tooldef_to_chat_tool(t)
|
||||
if self.ctx.chat_tools is None:
|
||||
self.ctx.chat_tools = []
|
||||
self.ctx.chat_tools.append(openai_tool)
|
||||
|
|
|
@ -12,7 +12,7 @@ from llama_stack.apis.agents import Agents, StepType
|
|||
from llama_stack.apis.benchmarks import Benchmark
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.inference import Inference, SystemMessage, UserMessage
|
||||
from llama_stack.apis.inference import Inference, OpenAISystemMessageParam, OpenAIUserMessageParam, UserMessage
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
||||
from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
|
||||
|
@ -159,31 +159,40 @@ class MetaReferenceEvalImpl(
|
|||
) -> list[dict[str, Any]]:
|
||||
candidate = benchmark_config.eval_candidate
|
||||
assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided"
|
||||
sampling_params = {"max_tokens": candidate.sampling_params.max_tokens}
|
||||
|
||||
generations = []
|
||||
for x in tqdm(input_rows):
|
||||
if ColumnName.completion_input.value in x:
|
||||
if candidate.sampling_params.stop:
|
||||
sampling_params["stop"] = candidate.sampling_params.stop
|
||||
|
||||
input_content = json.loads(x[ColumnName.completion_input.value])
|
||||
response = await self.inference_api.completion(
|
||||
response = await self.inference_api.openai_completion(
|
||||
model=candidate.model,
|
||||
content=input_content,
|
||||
sampling_params=candidate.sampling_params,
|
||||
prompt=input_content,
|
||||
**sampling_params,
|
||||
)
|
||||
generations.append({ColumnName.generated_answer.value: response.completion_message.content})
|
||||
generations.append({ColumnName.generated_answer.value: response.choices[0].text})
|
||||
elif ColumnName.chat_completion_input.value in x:
|
||||
chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
|
||||
input_messages = [UserMessage(**x) for x in chat_completion_input_json if x["role"] == "user"]
|
||||
input_messages = [
|
||||
OpenAIUserMessageParam(**x) for x in chat_completion_input_json if x["role"] == "user"
|
||||
]
|
||||
|
||||
messages = []
|
||||
if candidate.system_message:
|
||||
messages.append(candidate.system_message)
|
||||
messages += [SystemMessage(**x) for x in chat_completion_input_json if x["role"] == "system"]
|
||||
|
||||
messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"]
|
||||
|
||||
messages += input_messages
|
||||
response = await self.inference_api.chat_completion(
|
||||
model_id=candidate.model,
|
||||
response = await self.inference_api.openai_chat_completion(
|
||||
model=candidate.model,
|
||||
messages=messages,
|
||||
sampling_params=candidate.sampling_params,
|
||||
**sampling_params,
|
||||
)
|
||||
generations.append({ColumnName.generated_answer.value: response.completion_message.content})
|
||||
generations.append({ColumnName.generated_answer.value: response.choices[0].message.content})
|
||||
else:
|
||||
raise ValueError("Invalid input row")
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ from fastapi import File, Form, Response, UploadFile
|
|||
from llama_stack.apis.common.errors import ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.files import (
|
||||
ExpiresAfter,
|
||||
Files,
|
||||
ListOpenAIFileResponse,
|
||||
OpenAIFileDeleteResponse,
|
||||
|
@ -86,14 +87,13 @@ class LocalfsFilesImpl(Files):
|
|||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
"""Upload a file that can be used across various endpoints."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
if expires_after_anchor is not None or expires_after_seconds is not None:
|
||||
if expires_after is not None:
|
||||
raise NotImplementedError("File expiration is not supported by this provider")
|
||||
|
||||
file_id = self._generate_file_id()
|
||||
|
|
|
@ -195,8 +195,7 @@ class S3FilesImpl(Files):
|
|||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
file_id = f"file-{uuid.uuid4().hex}"
|
||||
|
||||
|
@ -204,14 +203,6 @@ class S3FilesImpl(Files):
|
|||
|
||||
created_at = self._now()
|
||||
|
||||
expires_after = None
|
||||
if expires_after_anchor is not None or expires_after_seconds is not None:
|
||||
# we use ExpiresAfter to validate input
|
||||
expires_after = ExpiresAfter(
|
||||
anchor=expires_after_anchor, # type: ignore[arg-type]
|
||||
seconds=expires_after_seconds, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
# the default is no expiration.
|
||||
# to implement no expiration we set an expiration beyond the max.
|
||||
# we'll hide this fact from users when returning the file object.
|
||||
|
|
|
@ -11,21 +11,17 @@ from botocore.client import BaseClient
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIEmbeddingsResponse,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -47,8 +43,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
content_has_media,
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
||||
from .models import MODEL_ENTRIES
|
||||
|
@ -218,36 +212,6 @@ class BedrockInferenceAdapter(
|
|||
),
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
# Convert foundation model ID to inference profile ID
|
||||
region_name = self.client.meta.region_name
|
||||
inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
|
||||
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
assert not content_has_media(content), "Bedrock does not support media for embeddings"
|
||||
input_text = interleaved_content_as_str(content)
|
||||
input_body = {"inputText": input_text}
|
||||
body = json.dumps(input_body)
|
||||
response = self.client.invoke_model(
|
||||
body=body,
|
||||
modelId=inference_profile_id,
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
)
|
||||
response_body = json.loads(response.get("body").read())
|
||||
embeddings.append(response_body.get("embedding"))
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -11,21 +11,17 @@ from cerebras.cloud.sdk import AsyncCerebras
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIEmbeddingsResponse,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -187,16 +183,6 @@ class CerebrasInferenceAdapter(
|
|||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -11,15 +11,12 @@ from databricks.sdk import WorkspaceClient
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
|
@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAICompletion,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -118,16 +114,6 @@ class DatabricksInferenceAdapter(
|
|||
) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._model_cache = {} # from OpenAIMixin
|
||||
ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key()) # TODO: this is not async
|
||||
|
|
|
@ -10,22 +10,18 @@ from fireworks.client import Fireworks
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -48,8 +44,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -259,28 +253,3 @@ class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, Nee
|
|||
logger.debug(f"params to fireworks: {params}")
|
||||
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
kwargs = {}
|
||||
if model.metadata.get("embedding_dimension"):
|
||||
kwargs["dimensions"] = model.metadata.get("embedding_dimension")
|
||||
assert all(not content_has_media(content) for content in contents), (
|
||||
"Fireworks does not support media for embeddings"
|
||||
)
|
||||
response = self._get_client().embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
embeddings = [data.embedding for data in response.data]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
|
|
@ -39,25 +39,6 @@ client = LlamaStackAsLibraryClient("nvidia")
|
|||
client.initialize()
|
||||
```
|
||||
|
||||
### Create Completion
|
||||
|
||||
The following example shows how to create a completion for an NVIDIA NIM.
|
||||
|
||||
> [!NOTE]
|
||||
> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.
|
||||
|
||||
```python
|
||||
response = client.inference.completion(
|
||||
model_id="meta-llama/Llama-3.1-8B-Instruct",
|
||||
content="Complete the sentence using one word: Roses are red, violets are :",
|
||||
stream=False,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
)
|
||||
print(f"Response: {response.content}")
|
||||
```
|
||||
|
||||
### Create Chat Completion
|
||||
|
||||
The following example shows how to create a chat completion for an NVIDIA NIM.
|
||||
|
|
|
@ -11,8 +11,6 @@ from openai import NOT_GIVEN, APIConnectionError
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
|
@ -21,8 +19,6 @@ from llama_stack.apis.inference import (
|
|||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
|
@ -31,7 +27,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIEmbeddingUsage,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
)
|
||||
|
@ -156,60 +151,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
# we pass n=1 to get only one completion
|
||||
return convert_openai_completion_choice(response.choices[0])
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
if any(content_has_media(content) for content in contents):
|
||||
raise NotImplementedError("Media is not supported")
|
||||
|
||||
#
|
||||
# Llama Stack: contents = list[str] | list[InterleavedContentItem]
|
||||
# ->
|
||||
# OpenAI: input = str | list[str]
|
||||
#
|
||||
# we can ignore str and always pass list[str] to OpenAI
|
||||
#
|
||||
flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
|
||||
input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
|
||||
provider_model_id = await self._get_provider_model_id(model_id)
|
||||
|
||||
extra_body = {}
|
||||
|
||||
if text_truncation is not None:
|
||||
text_truncation_options = {
|
||||
TextTruncation.none: "NONE",
|
||||
TextTruncation.end: "END",
|
||||
TextTruncation.start: "START",
|
||||
}
|
||||
extra_body["truncate"] = text_truncation_options[text_truncation]
|
||||
|
||||
if output_dimension is not None:
|
||||
extra_body["dimensions"] = output_dimension
|
||||
|
||||
if task_type is not None:
|
||||
task_type_options = {
|
||||
EmbeddingTaskType.document: "passage",
|
||||
EmbeddingTaskType.query: "query",
|
||||
}
|
||||
extra_body["input_type"] = task_type_options[task_type]
|
||||
|
||||
response = await self.client.embeddings.create(
|
||||
model=provider_model_id,
|
||||
input=input,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
#
|
||||
# OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...)
|
||||
# ->
|
||||
# Llama Stack: EmbeddingsResponse(embeddings=list[list[float]])
|
||||
#
|
||||
return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -14,7 +14,6 @@ from ollama import AsyncClient as AsyncOllamaClient
|
|||
from llama_stack.apis.common.content_types import (
|
||||
ImageContentItem,
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.common.errors import UnsupportedModelError
|
||||
|
@ -25,8 +24,6 @@ from llama_stack.apis.inference import (
|
|||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
GrammarResponseFormat,
|
||||
InferenceProvider,
|
||||
JsonSchemaResponseFormat,
|
||||
|
@ -34,7 +31,6 @@ from llama_stack.apis.inference import (
|
|||
Message,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -66,9 +62,7 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
convert_image_content_to_url,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -363,27 +357,6 @@ class OllamaInferenceAdapter(
|
|||
async for chunk in process_chat_completion_stream_response(stream, request):
|
||||
yield chunk
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self._get_model(model_id)
|
||||
|
||||
assert all(not content_has_media(content) for content in contents), (
|
||||
"Ollama does not support media for embeddings"
|
||||
)
|
||||
response = await self.ollama_client.embed(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
embeddings = response["embeddings"]
|
||||
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
if await self.check_model_availability(model.provider_model_id):
|
||||
return model
|
||||
|
|
|
@ -14,8 +14,6 @@ from llama_stack.apis.inference import (
|
|||
ChatCompletionResponse,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
CompletionMessage,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
|
@ -27,7 +25,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -190,25 +187,6 @@ class PassthroughInferenceAdapter(Inference):
|
|||
chunk = convert_to_pydantic(ChatCompletionResponseStreamChunk, chunk)
|
||||
yield chunk
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[InterleavedContent],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
client = self._get_client()
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
return await client.inference.embeddings(
|
||||
model_id=model.provider_resource_id,
|
||||
contents=contents,
|
||||
text_truncation=text_truncation,
|
||||
output_dimension=output_dimension,
|
||||
task_type=task_type,
|
||||
)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -136,16 +136,6 @@ class RunpodInferenceAdapter(
|
|||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -12,14 +12,11 @@ from pydantic import SecretStr
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
|
@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
|
|||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -306,16 +302,6 @@ class _HfAdapter(
|
|||
**self._build_options(request.sampling_params, request.response_format),
|
||||
)
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -12,14 +12,11 @@ from together.constants import BASE_URL
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
|
@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
|
|||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -50,8 +46,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -247,26 +241,6 @@ class TogetherInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, Need
|
|||
logger.debug(f"params to together: {params}")
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
assert all(not content_has_media(content) for content in contents), (
|
||||
"Together does not support media for embeddings"
|
||||
)
|
||||
client = self._get_client()
|
||||
r = await client.embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
embeddings = [item.embedding for item in r.data]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._model_cache = {}
|
||||
# Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
|
||||
|
|
|
@ -16,7 +16,6 @@ from openai.types.chat.chat_completion_chunk import (
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
TextDelta,
|
||||
ToolCallDelta,
|
||||
ToolCallParseStatus,
|
||||
|
@ -31,8 +30,6 @@ from llama_stack.apis.inference import (
|
|||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
GrammarResponseFormat,
|
||||
Inference,
|
||||
JsonSchemaResponseFormat,
|
||||
|
@ -41,7 +38,6 @@ from llama_stack.apis.inference import (
|
|||
ModelStore,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -74,8 +70,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -550,27 +544,3 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
|||
"stream": request.stream,
|
||||
**options,
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self._get_model(model_id)
|
||||
|
||||
kwargs = {}
|
||||
assert model.model_type == ModelType.embedding
|
||||
assert model.metadata.get("embedding_dimension")
|
||||
kwargs["dimensions"] = model.metadata.get("embedding_dimension")
|
||||
assert all(not content_has_media(content) for content in contents), "VLLM does not support media for embeddings"
|
||||
response = await self.client.embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
embeddings = [data.embedding for data in response.data]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
|
|
@ -11,13 +11,11 @@ from ibm_watsonx_ai.foundation_models import Model
|
|||
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
GreedySamplingStrategy,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
|
@ -30,7 +28,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -265,16 +262,6 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
}
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError("embedding is not supported for watsonx")
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -140,13 +140,11 @@ client.models.register(
|
|||
#### 2. Inference with the fine-tuned model
|
||||
|
||||
```python
|
||||
response = client.inference.completion(
|
||||
content="Complete the sentence using one word: Roses are red, violets are ",
|
||||
response = client.completions.create(
|
||||
prompt="Complete the sentence using one word: Roses are red, violets are ",
|
||||
stream=False,
|
||||
model_id="test-example-model@v1",
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
model="test-example-model@v1",
|
||||
max_tokens=50,
|
||||
)
|
||||
print(response.content)
|
||||
print(response.choices[0].text)
|
||||
```
|
||||
|
|
|
@ -15,16 +15,11 @@ if TYPE_CHECKING:
|
|||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
InterleavedContentItem,
|
||||
ModelStore,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
TextTruncation,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
|
||||
EMBEDDING_MODELS = {}
|
||||
|
||||
|
@ -35,23 +30,6 @@ log = get_logger(name=__name__, category="providers::utils")
|
|||
class SentenceTransformerEmbeddingMixin:
|
||||
model_store: ModelStore
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
embedding_model = await self._load_sentence_transformer_model(model.provider_resource_id)
|
||||
embeddings = await asyncio.to_thread(
|
||||
embedding_model.encode,
|
||||
[interleaved_content_as_str(content) for content in contents],
|
||||
show_progress_bar=False,
|
||||
)
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -11,14 +11,11 @@ import litellm
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
InferenceProvider,
|
||||
JsonSchemaResponseFormat,
|
||||
LogProbConfig,
|
||||
|
@ -32,7 +29,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
|
@ -50,9 +46,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
get_sampling_options,
|
||||
prepare_openai_completion_params,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
@ -269,24 +262,6 @@ class LiteLLMOpenAIMixin(
|
|||
)
|
||||
return api_key
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: list[str] | list[InterleavedContentItem],
|
||||
text_truncation: TextTruncation | None = TextTruncation.none,
|
||||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
response = litellm.embedding(
|
||||
model=self.get_litellm_model_name(model.provider_resource_id),
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
|
||||
embeddings = [data["embedding"] for data in response["data"]]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -399,6 +374,14 @@ class LiteLLMOpenAIMixin(
|
|||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
# Add usage tracking for streaming when telemetry is active
|
||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
||||
|
||||
if stream and get_current_span() is not None:
|
||||
if stream_options is None:
|
||||
stream_options = {"include_usage": True}
|
||||
elif "include_usage" not in stream_options:
|
||||
stream_options = {**stream_options, "include_usage": True}
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents import (
|
||||
Order,
|
||||
)
|
||||
|
@ -14,24 +17,51 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectWithInput,
|
||||
)
|
||||
from llama_stack.core.datatypes import AccessRule
|
||||
from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig
|
||||
from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..sqlstore.api import ColumnDefinition, ColumnType
|
||||
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
|
||||
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl
|
||||
|
||||
logger = get_logger(name=__name__, category="responses_store")
|
||||
|
||||
|
||||
class ResponsesStore:
|
||||
def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]):
|
||||
if not sql_store_config:
|
||||
sql_store_config = SqliteSqlStoreConfig(
|
||||
def __init__(
|
||||
self,
|
||||
config: ResponsesStoreConfig | SqlStoreConfig,
|
||||
policy: list[AccessRule],
|
||||
):
|
||||
# Handle backward compatibility
|
||||
if not isinstance(config, ResponsesStoreConfig):
|
||||
# Legacy: SqlStoreConfig passed directly as config
|
||||
config = ResponsesStoreConfig(
|
||||
sql_store_config=config,
|
||||
)
|
||||
|
||||
self.config = config
|
||||
self.sql_store_config = config.sql_store_config
|
||||
if not self.sql_store_config:
|
||||
self.sql_store_config = SqliteSqlStoreConfig(
|
||||
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
|
||||
)
|
||||
self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy)
|
||||
self.sql_store = None
|
||||
self.policy = policy
|
||||
|
||||
# Disable write queue for SQLite to avoid concurrency issues
|
||||
self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
|
||||
|
||||
# Async write queue and worker control
|
||||
self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None
|
||||
self._worker_tasks: list[asyncio.Task[Any]] = []
|
||||
self._max_write_queue_size: int = config.max_write_queue_size
|
||||
self._num_writers: int = max(1, config.num_writers)
|
||||
|
||||
async def initialize(self):
|
||||
"""Create the necessary tables if they don't exist."""
|
||||
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy)
|
||||
await self.sql_store.create_table(
|
||||
"openai_responses",
|
||||
{
|
||||
|
@ -42,9 +72,68 @@ class ResponsesStore:
|
|||
},
|
||||
)
|
||||
|
||||
if self.enable_write_queue:
|
||||
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
||||
for _ in range(self._num_writers):
|
||||
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
||||
else:
|
||||
logger.info("Write queue disabled for SQLite to avoid concurrency issues")
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if not self._worker_tasks:
|
||||
return
|
||||
if self._queue is not None:
|
||||
await self._queue.join()
|
||||
for t in self._worker_tasks:
|
||||
if not t.done():
|
||||
t.cancel()
|
||||
for t in self._worker_tasks:
|
||||
try:
|
||||
await t
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._worker_tasks.clear()
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Wait for all queued writes to complete. Useful for testing."""
|
||||
if self.enable_write_queue and self._queue is not None:
|
||||
await self._queue.join()
|
||||
|
||||
async def store_response_object(
|
||||
self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput]
|
||||
) -> None:
|
||||
if self.enable_write_queue:
|
||||
if self._queue is None:
|
||||
raise ValueError("Responses store is not initialized")
|
||||
try:
|
||||
self._queue.put_nowait((response_object, input))
|
||||
except asyncio.QueueFull:
|
||||
logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
|
||||
await self._queue.put((response_object, input))
|
||||
else:
|
||||
await self._write_response_object(response_object, input)
|
||||
|
||||
async def _worker_loop(self) -> None:
|
||||
assert self._queue is not None
|
||||
while True:
|
||||
try:
|
||||
item = await self._queue.get()
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
response_object, input = item
|
||||
try:
|
||||
await self._write_response_object(response_object, input)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.error(f"Error writing response object: {e}")
|
||||
finally:
|
||||
self._queue.task_done()
|
||||
|
||||
async def _write_response_object(
|
||||
self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput]
|
||||
) -> None:
|
||||
if self.sql_store is None:
|
||||
raise ValueError("Responses store is not initialized")
|
||||
|
||||
data = response_object.model_dump()
|
||||
data["input"] = [input_item.model_dump() for input_item in input]
|
||||
|
||||
|
@ -73,6 +162,9 @@ class ResponsesStore:
|
|||
:param model: The model to filter by.
|
||||
:param order: The order to sort the responses by.
|
||||
"""
|
||||
if not self.sql_store:
|
||||
raise ValueError("Responses store is not initialized")
|
||||
|
||||
if not order:
|
||||
order = Order.desc
|
||||
|
||||
|
@ -100,6 +192,9 @@ class ResponsesStore:
|
|||
"""
|
||||
Get a response object with automatic access control checking.
|
||||
"""
|
||||
if not self.sql_store:
|
||||
raise ValueError("Responses store is not initialized")
|
||||
|
||||
row = await self.sql_store.fetch_one(
|
||||
"openai_responses",
|
||||
where={"id": response_id},
|
||||
|
@ -113,6 +208,9 @@ class ResponsesStore:
|
|||
return OpenAIResponseObjectWithInput(**row["response_object"])
|
||||
|
||||
async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
if not self.sql_store:
|
||||
raise ValueError("Responses store is not initialized")
|
||||
|
||||
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
|
||||
if not row:
|
||||
raise ValueError(f"Response with id {response_id} not found")
|
||||
|
|
|
@ -22,7 +22,6 @@ class WebMethod:
|
|||
raw_bytes_request_body: bool | None = False
|
||||
# A descriptive name of the corresponding span created by tracing
|
||||
descriptive_name: str | None = None
|
||||
experimental: bool | None = False
|
||||
required_scope: str | None = None
|
||||
deprecated: bool | None = False
|
||||
|
||||
|
@ -39,7 +38,6 @@ def webmethod(
|
|||
response_examples: list[Any] | None = None,
|
||||
raw_bytes_request_body: bool | None = False,
|
||||
descriptive_name: str | None = None,
|
||||
experimental: bool | None = False,
|
||||
required_scope: str | None = None,
|
||||
deprecated: bool | None = False,
|
||||
) -> Callable[[T], T]:
|
||||
|
@ -50,7 +48,6 @@ def webmethod(
|
|||
:param public: True if the operation can be invoked without prior authentication.
|
||||
:param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON.
|
||||
:param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON.
|
||||
:param experimental: True if the operation is experimental and subject to change.
|
||||
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
|
||||
"""
|
||||
|
||||
|
@ -64,7 +61,6 @@ def webmethod(
|
|||
response_examples=response_examples,
|
||||
raw_bytes_request_body=raw_bytes_request_body,
|
||||
descriptive_name=descriptive_name,
|
||||
experimental=experimental,
|
||||
required_scope=required_scope,
|
||||
deprecated=deprecated,
|
||||
)
|
||||
|
|
|
@ -567,6 +567,22 @@ def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
|
|||
|
||||
if is_dataclass_type(typ):
|
||||
return ((field.name, field.type) for field in dataclasses.fields(typ))
|
||||
elif hasattr(typ, "model_fields"):
|
||||
# Pydantic BaseModel - use model_fields to exclude ClassVar and other non-field attributes
|
||||
# Reconstruct Annotated type if discriminator exists to preserve metadata
|
||||
from typing import Annotated, Any, cast
|
||||
from pydantic.fields import FieldInfo
|
||||
|
||||
def get_field_type(name: str, field: Any) -> type | str:
|
||||
# If field has discriminator, wrap in Annotated to preserve it for schema generation
|
||||
if field.discriminator:
|
||||
field_info = FieldInfo(annotation=None, discriminator=field.discriminator)
|
||||
# Annotated returns _AnnotatedAlias which isn't a type but is valid here
|
||||
return Annotated[field.annotation, field_info] # type: ignore[return-value]
|
||||
# field.annotation can be Union types, Annotated, etc. which aren't type but are valid
|
||||
return field.annotation # type: ignore[return-value,no-any-return]
|
||||
|
||||
return ((name, get_field_type(name, field)) for name, field in typ.model_fields.items())
|
||||
else:
|
||||
resolved_hints = get_resolved_hints(typ)
|
||||
return resolved_hints.items()
|
||||
|
|
|
@ -92,7 +92,12 @@ def get_class_property_docstrings(
|
|||
:returns: A dictionary mapping property names to descriptions.
|
||||
"""
|
||||
|
||||
result = {}
|
||||
result: Dict[str, str] = {}
|
||||
# Only try to get MRO if data_type is actually a class
|
||||
# Special types like Literal, Union, etc. don't have MRO
|
||||
if not inspect.isclass(data_type):
|
||||
return result
|
||||
|
||||
for base in inspect.getmro(data_type):
|
||||
docstr = docstring.parse_type(base)
|
||||
for param in docstr.params.values():
|
||||
|
|
|
@ -41,7 +41,6 @@ export default function ResponseDetailPage() {
|
|||
temperature: responseData.temperature,
|
||||
top_p: responseData.top_p,
|
||||
truncation: responseData.truncation,
|
||||
user: responseData.user,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -43,7 +43,6 @@ const convertResponseListData = (
|
|||
temperature: responseData.temperature,
|
||||
top_p: responseData.top_p,
|
||||
truncation: responseData.truncation,
|
||||
user: responseData.user,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -178,10 +178,10 @@ Note that when re-recording tests, you must use a Stack pointing to a server (i.
|
|||
|
||||
### Basic Test Pattern
|
||||
```python
|
||||
def test_basic_completion(llama_stack_client, text_model_id):
|
||||
response = llama_stack_client.inference.completion(
|
||||
def test_basic_chat_completion(llama_stack_client, text_model_id):
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
content=CompletionMessage(role="user", content="Hello"),
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
)
|
||||
|
||||
# Test structure, not AI output quality
|
||||
|
|
|
@ -166,7 +166,7 @@ def model_providers(llama_stack_client):
|
|||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_no_model(request):
|
||||
model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id"]
|
||||
model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
|
||||
test_func = request.node.function
|
||||
|
||||
actual_params = inspect.signature(test_func).parameters.keys()
|
||||
|
@ -274,7 +274,7 @@ def require_server(llama_stack_client):
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def openai_client(llama_stack_client, require_server):
|
||||
base_url = f"{llama_stack_client.base_url}/v1/openai/v1"
|
||||
base_url = f"{llama_stack_client.base_url}/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
|
|||
|
||||
@pytest.fixture
|
||||
def openai_client(client_with_models):
|
||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||
base_url = f"{client_with_models.base_url}/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
||||
|
||||
|
|
56
tests/integration/recordings/responses/8d035e153b6f.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Who is the CEO of Meta?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-708",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1759012142,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 24,
|
||||
"prompt_tokens": 32,
|
||||
"total_tokens": 56,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
56
tests/integration/recordings/responses/92a9a916ef02.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the currency of Japan?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-343",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "The currency of Japan is the Japanese yen (, ry\u014d) and its symbol, \u00a5.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1759012146,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 20,
|
||||
"prompt_tokens": 32,
|
||||
"total_tokens": 52,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
56
tests/integration/recordings/responses/c62eb5d7115e.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the smallest country in the world?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-842",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "The smallest country in the world is the Vatican City, an independent city-state located within Rome, Italy. It has a total area of approximately 0.44 km\u00b2 (0.17 sq mi) and a population of around 800 people.\n\nDespite its tiny size, the Vatican City is a sovereign state with its own government, currency, postal system, and even a small army (the Gendarmeria Romana). It's also home to numerous iconic landmarks, including St. Peter's Basilica, the Sistine Chapel, and the Vatican Museums.\n\nThe Vatican City is so small that it can fit entirely within an average American city park!",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1759012145,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 133,
|
||||
"prompt_tokens": 34,
|
||||
"total_tokens": 167,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
56
tests/integration/recordings/responses/e25ab43491af.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the capital of France?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-808",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "The capital of France is Paris.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1759012142,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 8,
|
||||
"prompt_tokens": 32,
|
||||
"total_tokens": 40,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
56
tests/integration/recordings/responses/f28a44c97ea7.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the largest planet in our solar system?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-282",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "The largest planet in our solar system is Jupiter. It is a gas giant, with a diameter of approximately 142,984 kilometers (88,846 miles). This makes it more than 11 times the diameter of the Earth and more than 2.5 times the mass of all the other planets in our solar system combined.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1759012143,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 67,
|
||||
"prompt_tokens": 35,
|
||||
"total_tokens": 102,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -107,14 +107,34 @@ async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item
|
|||
assert "text/yaml" in str(w[0].message)
|
||||
|
||||
|
||||
async def test_get_raw_document_text_supports_json_mime_type():
|
||||
"""Test that the function accepts application/json mime type."""
|
||||
json_content = '{"name": "test", "version": "1.0", "items": ["item1", "item2"]}'
|
||||
|
||||
document = Document(content=json_content, mime_type="application/json")
|
||||
|
||||
result = await get_raw_document_text(document)
|
||||
assert result == json_content
|
||||
|
||||
|
||||
async def test_get_raw_document_text_with_json_text_content_item():
|
||||
"""Test that the function handles JSON TextContentItem correctly."""
|
||||
json_content = '{"key": "value", "nested": {"array": [1, 2, 3]}}'
|
||||
|
||||
document = Document(content=TextContentItem(text=json_content), mime_type="application/json")
|
||||
|
||||
result = await get_raw_document_text(document)
|
||||
assert result == json_content
|
||||
|
||||
|
||||
async def test_get_raw_document_text_rejects_unsupported_mime_types():
|
||||
"""Test that the function rejects unsupported mime types."""
|
||||
document = Document(
|
||||
content="Some content",
|
||||
mime_type="application/json", # Not supported
|
||||
mime_type="application/pdf", # Not supported
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
|
||||
with pytest.raises(ValueError, match="Unexpected document mime type: application/pdf"):
|
||||
await get_raw_document_text(document)
|
||||
|
||||
|
||||
|
|
|
@ -42,10 +42,12 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
|
||||
from llama_stack.core.access_control.access_control import default_policy
|
||||
from llama_stack.core.datatypes import ResponsesStoreConfig
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
|
||||
OpenAIResponsesImpl,
|
||||
)
|
||||
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
|
||||
|
||||
|
||||
|
@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic():
|
|||
|
||||
# Create mock store and response store
|
||||
mock_sql_store = AsyncMock()
|
||||
responses_store = ResponsesStore(sql_store_config=None, policy=default_policy())
|
||||
responses_store = ResponsesStore(
|
||||
ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy()
|
||||
)
|
||||
responses_store.sql_store = mock_sql_store
|
||||
|
||||
# Setup test data - multiple input items
|
||||
|
|
|
@ -228,12 +228,13 @@ class TestS3FilesImpl:
|
|||
|
||||
mock_now.return_value = 0
|
||||
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_expired_file"
|
||||
uploaded = await s3_provider.openai_upload_file(
|
||||
file=sample_text_file,
|
||||
purpose=OpenAIFilePurpose.ASSISTANTS,
|
||||
expires_after_anchor="created_at",
|
||||
expires_after_seconds=two_hours,
|
||||
expires_after=ExpiresAfter(anchor="created_at", seconds=two_hours),
|
||||
)
|
||||
|
||||
mock_now.return_value = two_hours * 2 # fast forward 4 hours
|
||||
|
@ -259,42 +260,44 @@ class TestS3FilesImpl:
|
|||
|
||||
async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
|
||||
"""Unsupported anchor value should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_unsupported_expires_after_anchor"
|
||||
|
||||
with pytest.raises(ValueError, match="Input should be 'created_at'"):
|
||||
await s3_provider.openai_upload_file(
|
||||
file=sample_text_file,
|
||||
purpose=OpenAIFilePurpose.ASSISTANTS,
|
||||
expires_after_anchor="now",
|
||||
expires_after_seconds=3600,
|
||||
expires_after=ExpiresAfter(anchor="now", seconds=3600), # type: ignore
|
||||
)
|
||||
|
||||
async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
|
||||
"""Non-integer seconds in expires_after should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_nonint_expires_after_seconds"
|
||||
|
||||
with pytest.raises(ValueError, match="should be a valid integer"):
|
||||
await s3_provider.openai_upload_file(
|
||||
file=sample_text_file,
|
||||
purpose=OpenAIFilePurpose.ASSISTANTS,
|
||||
expires_after_anchor="created_at",
|
||||
expires_after_seconds="many",
|
||||
expires_after=ExpiresAfter(anchor="created_at", seconds="many"), # type: ignore
|
||||
)
|
||||
|
||||
async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
|
||||
"""Seconds outside allowed range should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
|
||||
with pytest.raises(ValueError, match="greater than or equal to 3600"):
|
||||
await s3_provider.openai_upload_file(
|
||||
file=sample_text_file,
|
||||
purpose=OpenAIFilePurpose.ASSISTANTS,
|
||||
expires_after_anchor="created_at",
|
||||
expires_after_seconds=3599,
|
||||
expires_after=ExpiresAfter(anchor="created_at", seconds=3599),
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="less than or equal to 2592000"):
|
||||
await s3_provider.openai_upload_file(
|
||||
file=sample_text_file,
|
||||
purpose=OpenAIFilePurpose.ASSISTANTS,
|
||||
expires_after_anchor="created_at",
|
||||
expires_after_seconds=2592001,
|
||||
expires_after=ExpiresAfter(anchor="created_at", seconds=2592001),
|
||||
)
|
||||
|
|
5
tests/unit/providers/inline/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
5
tests/unit/providers/inline/agents/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -0,0 +1,42 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.tools import ToolDef, ToolParameter
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
|
||||
convert_tooldef_to_chat_tool,
|
||||
)
|
||||
|
||||
|
||||
def test_convert_tooldef_to_chat_tool_preserves_items_field():
|
||||
"""Test that array parameters preserve the items field during conversion.
|
||||
|
||||
This test ensures that when converting ToolDef with array-type parameters
|
||||
to OpenAI ChatCompletionToolParam format, the 'items' field is preserved.
|
||||
Without this fix, array parameters would be missing schema information about their items.
|
||||
"""
|
||||
tool_def = ToolDef(
|
||||
name="test_tool",
|
||||
description="A test tool with array parameter",
|
||||
parameters=[
|
||||
ToolParameter(
|
||||
name="tags",
|
||||
parameter_type="array",
|
||||
description="List of tags",
|
||||
required=True,
|
||||
items={"type": "string"},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
result = convert_tooldef_to_chat_tool(tool_def)
|
||||
|
||||
assert result["type"] == "function"
|
||||
assert result["function"]["name"] == "test_tool"
|
||||
|
||||
tags_param = result["function"]["parameters"]["properties"]["tags"]
|
||||
assert tags_param["type"] == "array"
|
||||
assert "items" in tags_param, "items field should be preserved for array parameters"
|
||||
assert tags_param["items"] == {"type": "string"}
|
|
@ -5,13 +5,12 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import EmbeddingsResponse, Inference
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
|
@ -70,13 +69,6 @@ def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock:
|
|||
return mock_vector_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_inference_api(sample_embeddings):
|
||||
mock_api = MagicMock(spec=Inference)
|
||||
mock_api.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings))
|
||||
return mock_api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_files_api():
|
||||
mock_api = MagicMock(spec=Files)
|
||||
|
@ -96,22 +88,6 @@ async def faiss_index(embedding_dimension):
|
|||
yield index
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter:
|
||||
# Create the adapter
|
||||
adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api)
|
||||
|
||||
# Create a mock KVStore
|
||||
mock_kvstore = MagicMock()
|
||||
mock_kvstore.values_in_range = AsyncMock(return_value=[])
|
||||
|
||||
# Patch the initialize method to avoid the kvstore_impl call
|
||||
with patch.object(FaissVectorIOAdapter, "initialize"):
|
||||
# Set the kvstore directly
|
||||
adapter.kvstore = mock_kvstore
|
||||
yield adapter
|
||||
|
||||
|
||||
async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical(
|
||||
faiss_index, sample_chunks, sample_embeddings, embedding_dimension
|
||||
):
|
||||
|
|
|
@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic():
|
|||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test 1: First page with limit=2, descending order (default)
|
||||
result = await store.list_responses(limit=2, order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending():
|
|||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test ascending order pagination
|
||||
result = await store.list_responses(limit=1, order=Order.asc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter():
|
|||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test pagination with model filter
|
||||
result = await store.list_responses(limit=1, model="model-a", order=Order.desc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit():
|
|||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test without limit (should use default of 50)
|
||||
result = await store.list_responses(order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
@ -212,6 +224,9 @@ async def test_responses_store_get_response_object():
|
|||
input_list = [create_test_response_input("Test input content", "input-test-resp")]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Retrieve the response
|
||||
retrieved = await store.get_response_object("test-resp")
|
||||
assert retrieved.id == "test-resp"
|
||||
|
@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination():
|
|||
]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Verify all items are stored correctly with explicit IDs
|
||||
all_items = await store.list_response_input_items("test-resp", order=Order.desc)
|
||||
assert len(all_items.data) == 5
|
||||
|
@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination():
|
|||
]
|
||||
await store.store_response_object(response, input_list)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test before pagination with descending order
|
||||
# In desc order: [Fifth, Fourth, Third, Second, First]
|
||||
# before="before-3" should return [Fifth, Fourth]
|
||||
|
|