diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index a5aa31af4..3929df09c 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -82,11 +82,14 @@ runs: echo "No recording changes" fi - - name: Write inference logs to file + - name: Write docker logs to file if: ${{ always() }} shell: bash run: | sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true + distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://') + stack_container_name="llama-stack-test-$distro_name" + sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true - name: Upload logs if: ${{ always() }} diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index ea3ff2b64..30a8063ea 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -73,6 +73,24 @@ jobs: image_name: kube apis: [] providers: {} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: $run_dir/kvstore.db + sql_default: + type: sql_sqlite + db_path: $run_dir/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default server: port: 8321 EOF diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index e9a758873..a6a86b15f 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -169,9 +169,7 @@ jobs: run: | uv run --no-sync \ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ - tests/integration/vector_io \ - --embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ - --embedding-dimension 768 + tests/integration/vector_io - name: Check Storage and Memory Available After Tests if: ${{ always() }} diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index bb8a48d65..e1ca170f5 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -98,21 +98,30 @@ data: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} - metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore - inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 @@ -137,5 +146,4 @@ data: port: 8323 kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index e2fbfd7a4..2ccaa21aa 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -95,21 +95,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md new file mode 100644 index 000000000..5d391f14c --- /dev/null +++ b/client-sdks/stainless/README.md @@ -0,0 +1,8 @@ +These are the source-of-truth configuration files used to generate the Stainless client SDKs via Stainless. + +- `openapi.yml`: this is the OpenAPI specification for the Llama Stack API. +- `openapi.stainless.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs. + +A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files. + +These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script. \ No newline at end of file diff --git a/client-sdks/stainless/openapi.stainless.yml b/client-sdks/stainless/openapi.stainless.yml new file mode 100644 index 000000000..0a5dfc044 --- /dev/null +++ b/client-sdks/stainless/openapi.stainless.yml @@ -0,0 +1,608 @@ +# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json + +organization: + # Name of your organization or company, used to determine the name of the client + # and headings. + name: llama-stack-client + docs: https://llama-stack.readthedocs.io/en/latest/ + contact: llamastack@meta.com +security: + - {} + - BearerAuth: [] +security_schemes: + BearerAuth: + type: http + scheme: bearer +# `targets` define the output targets and their customization options, such as +# whether to emit the Node SDK and what it's package name should be. +targets: + node: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-typescript + publish: + npm: false + python: + package_name: llama_stack_client + production_repo: llamastack/llama-stack-client-python + options: + use_uv: true + publish: + pypi: true + project_name: llama_stack_client + kotlin: + reverse_domain: com.llama_stack_client.api + production_repo: null + publish: + maven: false + go: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-go + options: + enable_v2: true + back_compat_use_shared_package: false + +# `client_settings` define settings for the API client, such as extra constructor +# arguments (used for authentication), retry behavior, idempotency, etc. +client_settings: + default_env_prefix: LLAMA_STACK_CLIENT + opts: + api_key: + type: string + read_env: LLAMA_STACK_CLIENT_API_KEY + auth: { security_scheme: BearerAuth } + nullable: true + +# `environments` are a map of the name of the environment (e.g. "sandbox", +# "production") to the corresponding url to use. +environments: + production: http://any-hosted-llama-stack.com + +# `pagination` defines [pagination schemes] which provides a template to match +# endpoints and generate next-page and auto-pagination helpers in the SDKs. +pagination: + - name: datasets_iterrows + type: offset + request: + dataset_id: + type: string + start_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_param + limit: + type: integer + response: + data: + type: array + items: + type: object + next_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_start_field + - name: openai_cursor_page + type: cursor + request: + limit: + type: integer + after: + type: string + x-stainless-pagination-property: + purpose: next_cursor_param + response: + data: + type: array + items: {} + has_more: + type: boolean + last_id: + type: string + x-stainless-pagination-property: + purpose: next_cursor_field +# `resources` define the structure and organziation for your API, such as how +# methods and models are grouped together and accessed. See the [configuration +# guide] for more information. +# +# [configuration guide]: +# https://app.stainlessapi.com/docs/guides/configure#resources +resources: + $shared: + models: + agent_config: AgentConfig + interleaved_content_item: InterleavedContentItem + interleaved_content: InterleavedContent + param_type: ParamType + safety_violation: SafetyViolation + sampling_params: SamplingParams + scoring_result: ScoringResult + message: Message + user_message: UserMessage + completion_message: CompletionMessage + tool_response_message: ToolResponseMessage + system_message: SystemMessage + tool_call: ToolCall + query_result: RAGQueryResult + document: RAGDocument + query_config: RAGQueryConfig + response_format: ResponseFormat + toolgroups: + models: + tool_group: ToolGroup + list_tool_groups_response: ListToolGroupsResponse + methods: + register: post /v1/toolgroups + get: get /v1/toolgroups/{toolgroup_id} + list: get /v1/toolgroups + unregister: delete /v1/toolgroups/{toolgroup_id} + tools: + methods: + get: get /v1/tools/{tool_name} + list: + endpoint: get /v1/tools + paginated: false + + tool_runtime: + models: + tool_def: ToolDef + tool_invocation_result: ToolInvocationResult + methods: + list_tools: + endpoint: get /v1/tool-runtime/list-tools + paginated: false + invoke_tool: post /v1/tool-runtime/invoke + subresources: + rag_tool: + methods: + insert: post /v1/tool-runtime/rag-tool/insert + query: post /v1/tool-runtime/rag-tool/query + + responses: + models: + response_object_stream: OpenAIResponseObjectStream + response_object: OpenAIResponseObject + methods: + create: + type: http + endpoint: post /v1/responses + streaming: + stream_event_model: responses.response_object_stream + param_discriminator: stream + retrieve: get /v1/responses/{response_id} + list: + type: http + endpoint: get /v1/responses + delete: + type: http + endpoint: delete /v1/responses/{response_id} + subresources: + input_items: + methods: + list: + type: http + endpoint: get /v1/responses/{response_id}/input_items + + conversations: + models: + conversation_object: Conversation + methods: + create: + type: http + endpoint: post /v1/conversations + retrieve: get /v1/conversations/{conversation_id} + update: + type: http + endpoint: post /v1/conversations/{conversation_id} + delete: + type: http + endpoint: delete /v1/conversations/{conversation_id} + subresources: + items: + methods: + get: + type: http + endpoint: get /v1/conversations/{conversation_id}/items/{item_id} + list: + type: http + endpoint: get /v1/conversations/{conversation_id}/items + create: + type: http + endpoint: post /v1/conversations/{conversation_id}/items + + datasets: + models: + list_datasets_response: ListDatasetsResponse + methods: + register: post /v1beta/datasets + retrieve: get /v1beta/datasets/{dataset_id} + list: + endpoint: get /v1beta/datasets + paginated: false + unregister: delete /v1beta/datasets/{dataset_id} + iterrows: get /v1beta/datasetio/iterrows/{dataset_id} + appendrows: post /v1beta/datasetio/append-rows/{dataset_id} + + inspect: + models: + healthInfo: HealthInfo + providerInfo: ProviderInfo + routeInfo: RouteInfo + versionInfo: VersionInfo + methods: + health: get /v1/health + version: get /v1/version + + embeddings: + models: + create_embeddings_response: OpenAIEmbeddingsResponse + methods: + create: post /v1/embeddings + + chat: + models: + chat_completion_chunk: OpenAIChatCompletionChunk + subresources: + completions: + methods: + create: + type: http + endpoint: post /v1/chat/completions + streaming: + stream_event_model: chat.chat_completion_chunk + param_discriminator: stream + list: + type: http + endpoint: get /v1/chat/completions + retrieve: + type: http + endpoint: get /v1/chat/completions/{completion_id} + completions: + methods: + create: + type: http + endpoint: post /v1/completions + streaming: + param_discriminator: stream + + vector_io: + models: + queryChunksResponse: QueryChunksResponse + methods: + insert: post /v1/vector-io/insert + query: post /v1/vector-io/query + + vector_stores: + models: + vector_store: VectorStoreObject + list_vector_stores_response: VectorStoreListResponse + vector_store_delete_response: VectorStoreDeleteResponse + vector_store_search_response: VectorStoreSearchResponsePage + methods: + create: post /v1/vector_stores + list: + endpoint: get /v1/vector_stores + retrieve: get /v1/vector_stores/{vector_store_id} + update: post /v1/vector_stores/{vector_store_id} + delete: delete /v1/vector_stores/{vector_store_id} + search: post /v1/vector_stores/{vector_store_id}/search + subresources: + files: + models: + vector_store_file: VectorStoreFileObject + methods: + list: get /v1/vector_stores/{vector_store_id}/files + retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id} + update: post /v1/vector_stores/{vector_store_id}/files/{file_id} + delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id} + create: post /v1/vector_stores/{vector_store_id}/files + content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content + file_batches: + models: + vector_store_file_batches: VectorStoreFileBatchObject + list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse + methods: + create: post /v1/vector_stores/{vector_store_id}/file_batches + retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id} + list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files + cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel + + models: + models: + model: Model + list_models_response: ListModelsResponse + methods: + retrieve: get /v1/models/{model_id} + list: + endpoint: get /v1/models + paginated: false + register: post /v1/models + unregister: delete /v1/models/{model_id} + subresources: + openai: + methods: + list: + endpoint: get /v1/models + paginated: false + + providers: + models: + list_providers_response: ListProvidersResponse + methods: + list: + endpoint: get /v1/providers + paginated: false + retrieve: get /v1/providers/{provider_id} + + routes: + models: + list_routes_response: ListRoutesResponse + methods: + list: + endpoint: get /v1/inspect/routes + paginated: false + + + moderations: + models: + create_response: ModerationObject + methods: + create: post /v1/moderations + + + safety: + models: + run_shield_response: RunShieldResponse + methods: + run_shield: post /v1/safety/run-shield + + + shields: + models: + shield: Shield + list_shields_response: ListShieldsResponse + methods: + retrieve: get /v1/shields/{identifier} + list: + endpoint: get /v1/shields + paginated: false + register: post /v1/shields + delete: delete /v1/shields/{identifier} + + synthetic_data_generation: + models: + syntheticDataGenerationResponse: SyntheticDataGenerationResponse + methods: + generate: post /v1/synthetic-data-generation/generate + + telemetry: + models: + span_with_status: SpanWithStatus + trace: Trace + query_spans_response: QuerySpansResponse + event: Event + query_condition: QueryCondition + methods: + query_traces: + endpoint: post /v1alpha/telemetry/traces + skip_test_reason: 'unsupported query params in java / kotlin' + get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree + query_spans: + endpoint: post /v1alpha/telemetry/spans + skip_test_reason: 'unsupported query params in java / kotlin' + query_metrics: + endpoint: post /v1alpha/telemetry/metrics/{metric_name} + skip_test_reason: 'unsupported query params in java / kotlin' + # log_event: post /v1alpha/telemetry/events + save_spans_to_dataset: post /v1alpha/telemetry/spans/export + get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id} + get_trace: get /v1alpha/telemetry/traces/{trace_id} + + scoring: + methods: + score: post /v1/scoring/score + score_batch: post /v1/scoring/score-batch + scoring_functions: + methods: + retrieve: get /v1/scoring-functions/{scoring_fn_id} + list: + endpoint: get /v1/scoring-functions + paginated: false + register: post /v1/scoring-functions + models: + scoring_fn: ScoringFn + scoring_fn_params: ScoringFnParams + list_scoring_functions_response: ListScoringFunctionsResponse + + benchmarks: + methods: + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id} + list: + endpoint: get /v1alpha/eval/benchmarks + paginated: false + register: post /v1alpha/eval/benchmarks + models: + benchmark: Benchmark + list_benchmarks_response: ListBenchmarksResponse + + files: + methods: + create: post /v1/files + list: get /v1/files + retrieve: get /v1/files/{file_id} + delete: delete /v1/files/{file_id} + content: get /v1/files/{file_id}/content + models: + file: OpenAIFileObject + list_files_response: ListOpenAIFileResponse + delete_file_response: OpenAIFileDeleteResponse + + alpha: + subresources: + inference: + methods: + rerank: post /v1alpha/inference/rerank + + post_training: + models: + algorithm_config: AlgorithmConfig + post_training_job: PostTrainingJob + list_post_training_jobs_response: ListPostTrainingJobsResponse + methods: + preference_optimize: post /v1alpha/post-training/preference-optimize + supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune + subresources: + job: + methods: + artifacts: get /v1alpha/post-training/job/artifacts + cancel: post /v1alpha/post-training/job/cancel + status: get /v1alpha/post-training/job/status + list: + endpoint: get /v1alpha/post-training/jobs + paginated: false + + eval: + methods: + evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + + subresources: + jobs: + methods: + cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result + models: + evaluate_response: EvaluateResponse + benchmark_config: BenchmarkConfig + job: Job + + agents: + methods: + create: post /v1alpha/agents + list: get /v1alpha/agents + retrieve: get /v1alpha/agents/{agent_id} + delete: delete /v1alpha/agents/{agent_id} + models: + inference_step: InferenceStep + tool_execution_step: ToolExecutionStep + tool_response: ToolResponse + shield_call_step: ShieldCallStep + memory_retrieval_step: MemoryRetrievalStep + subresources: + session: + models: + session: Session + methods: + list: get /v1alpha/agents/{agent_id}/sessions + create: post /v1alpha/agents/{agent_id}/session + delete: delete /v1alpha/agents/{agent_id}/session/{session_id} + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id} + steps: + methods: + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id} + turn: + models: + turn: Turn + turn_response_event: AgentTurnResponseEvent + agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk + methods: + create: + type: http + endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn + streaming: + stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk + param_discriminator: stream + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id} + resume: + type: http + endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume + streaming: + stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk + param_discriminator: stream + + +settings: + license: MIT + unwrap_response_fields: [ data ] + +openapi: + transformations: + - command: renameValue + reason: pydantic reserved name + args: + filter: + only: + - '$.components.schemas.InferenceStep.properties.model_response' + rename: + python: + property_name: 'inference_model_response' + + # - command: renameValue + # reason: pydantic reserved name + # args: + # filter: + # only: + # - '$.components.schemas.Model.properties.model_type' + # rename: + # python: + # property_name: 'type' + - command: mergeObject + reason: Better return_type using enum + args: + target: + - '$.components.schemas' + object: + ReturnType: + additionalProperties: false + properties: + type: + enum: + - string + - number + - boolean + - array + - object + - json + - union + - chat_completion_input + - completion_input + - agent_turn_input + required: + - type + type: object + - command: replaceProperties + reason: Replace return type properties with better model (see above) + args: + filter: + only: + - '$.components.schemas.ScoringFn.properties.return_type' + - '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type' + value: + $ref: '#/components/schemas/ReturnType' + - command: oneOfToAnyOf + reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants + - reason: For better names + command: extractToRefs + args: + ref: + target: '$.components.schemas.ToolCallDelta.properties.tool_call' + name: '#/components/schemas/ToolCallOrString' + +# `readme` is used to configure the code snippets that will be rendered in the +# README.md of various SDKs. In particular, you can change the `headline` +# snippet's endpoint and the arguments to call it with. +readme: + example_requests: + default: + type: request + endpoint: post /v1/chat/completions + params: &ref_0 {} + headline: + type: request + endpoint: post /v1/models + params: *ref_0 + pagination: + type: request + endpoint: post /v1/chat/completions + params: {} diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml new file mode 100644 index 000000000..eff01931f --- /dev/null +++ b/client-sdks/stainless/openapi.yml @@ -0,0 +1,13653 @@ +openapi: 3.1.0 +info: + title: >- + Llama Stack Specification - Stable & Experimental APIs + version: v1 + description: >- + This is the specification of the Llama Stack that provides + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. + + **🔗 COMBINED**: This specification includes both stable production-ready APIs + and experimental pre-release APIs. Use stable APIs for production deployments + and experimental APIs for testing new features. +servers: + - url: http://any-hosted-llama-stack.com +paths: + /v1/chat/completions: + get: + responses: + '200': + description: A ListOpenAIChatCompletionResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: List chat completions. + description: List chat completions. + parameters: + - name: after + in: query + description: >- + The ID of the last chat completion to return. + required: false + schema: + type: string + - name: limit + in: query + description: >- + The maximum number of chat completions to return. + required: false + schema: + type: integer + - name: model + in: query + description: The model to filter by. + required: false + schema: + type: string + - name: order + in: query + description: >- + The order to sort the chat completions by: "asc" or "desc". Defaults to + "desc". + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + post: + responses: + '200': + description: An OpenAIChatCompletion. + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletion' + - $ref: '#/components/schemas/OpenAIChatCompletionChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create chat completions. + description: >- + Create chat completions. + + Generate an OpenAI-compatible chat completion for the given messages using + the specified model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' + required: true + deprecated: false + /v1/chat/completions/{completion_id}: + get: + responses: + '200': + description: A OpenAICompletionWithInputMessages. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Get chat completion. + description: >- + Get chat completion. + + Describe a chat completion by its ID. + parameters: + - name: completion_id + in: path + description: ID of the chat completion. + required: true + schema: + type: string + deprecated: false + /v1/completions: + post: + responses: + '200': + description: An OpenAICompletion. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletion' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create completion. + description: >- + Create completion. + + Generate an OpenAI-compatible completion for the given prompt using the specified + model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' + required: true + deprecated: false + /v1/conversations: + post: + responses: + '200': + description: The created conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Create a conversation. + description: >- + Create a conversation. + + Create a conversation. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateConversationRequest' + required: true + deprecated: false + /v1/conversations/{conversation_id}: + get: + responses: + '200': + description: The conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Retrieve a conversation. + description: >- + Retrieve a conversation. + + Get a conversation with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: The updated conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Update a conversation. + description: >- + Update a conversation. + + Update a conversation's metadata with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateConversationRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: The deleted conversation resource. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationDeletedResource' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Delete a conversation. + description: >- + Delete a conversation. + + Delete a conversation with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + deprecated: false + /v1/conversations/{conversation_id}/items: + get: + responses: + '200': + description: List of conversation items. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: List items. + description: >- + List items. + + List items in the conversation. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: after + in: query + description: >- + An item ID to list items after, used in pagination. + required: true + schema: + oneOf: + - type: string + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: include + in: query + description: >- + Specify additional output data to include in the response. + required: true + schema: + oneOf: + - type: array + items: + type: string + enum: + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: limit + in: query + description: >- + A limit on the number of objects to be returned (1-100, default 20). + required: true + schema: + oneOf: + - type: integer + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: order + in: query + description: >- + The order to return items in (asc or desc, default desc). + required: true + schema: + oneOf: + - type: string + enum: + - asc + - desc + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + deprecated: false + post: + responses: + '200': + description: List of created items. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Create items. + description: >- + Create items. + + Create items in the conversation. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AddItemsRequest' + required: true + deprecated: false + /v1/conversations/{conversation_id}/items/{item_id}: + get: + responses: + '200': + description: The conversation item. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItem' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Retrieve an item. + description: >- + Retrieve an item. + + Retrieve a conversation item. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: item_id + in: path + description: The item identifier. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: The deleted item resource. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemDeletedResource' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Delete an item. + description: >- + Delete an item. + + Delete a conversation item. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: item_id + in: path + description: The item identifier. + required: true + schema: + type: string + deprecated: false + /v1/embeddings: + post: + responses: + '200': + description: >- + An OpenAIEmbeddingsResponse containing the embeddings. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIEmbeddingsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create embeddings. + description: >- + Create embeddings. + + Generate OpenAI-compatible embeddings for the given input using the specified + model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' + required: true + deprecated: false + /v1/files: + get: + responses: + '200': + description: >- + An ListOpenAIFileResponse containing the list of files. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIFileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: List files. + description: >- + List files. + + Returns a list of files that belong to the user's organization. + parameters: + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. For instance, if you make a list request and receive + 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo + in order to fetch the next page of the list. + required: false + schema: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 10,000, and the default is 10,000. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + $ref: '#/components/schemas/Order' + - name: purpose + in: query + description: >- + Only return files with the given purpose. + required: false + schema: + $ref: '#/components/schemas/OpenAIFilePurpose' + deprecated: false + post: + responses: + '200': + description: >- + An OpenAIFileObject representing the uploaded file. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Upload file. + description: >- + Upload file. + + Upload a file that can be used across various endpoints. + + + The file upload should be a multipart form request with: + + - file: The File object (not file name) to be uploaded. + + - purpose: The intended purpose of the uploaded file. + + - expires_after: Optional form values describing expiration for the file. + parameters: [] + requestBody: + content: + multipart/form-data: + schema: + type: object + properties: + file: + type: string + format: binary + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + $ref: '#/components/schemas/ExpiresAfter' + required: + - file + - purpose + required: true + deprecated: false + /v1/files/{file_id}: + get: + responses: + '200': + description: >- + An OpenAIFileObject containing file information. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Retrieve file. + description: >- + Retrieve file. + + Returns information about a specific file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: >- + An OpenAIFileDeleteResponse indicating successful deletion. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Delete file. + description: Delete file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + /v1/files/{file_id}/content: + get: + responses: + '200': + description: >- + The raw file content as a binary response. + content: + application/json: + schema: + $ref: '#/components/schemas/Response' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Retrieve file content. + description: >- + Retrieve file content. + + Returns the contents of the specified file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + /v1/health: + get: + responses: + '200': + description: >- + Health information indicating if the service is operational. + content: + application/json: + schema: + $ref: '#/components/schemas/HealthInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: Get health status. + description: >- + Get health status. + + Get the current health status of the service. + parameters: [] + deprecated: false + /v1/inspect/routes: + get: + responses: + '200': + description: >- + Response containing information about all available routes. + content: + application/json: + schema: + $ref: '#/components/schemas/ListRoutesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: List routes. + description: >- + List routes. + + List all available API routes with their methods and implementing providers. + parameters: [] + deprecated: false + /v1/models: + get: + responses: + '200': + description: A ListModelsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: List all models. + description: List all models. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Model. + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Register model. + description: >- + Register model. + + Register a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterModelRequest' + required: true + deprecated: false + /v1/models/{model_id}: + get: + responses: + '200': + description: A Model. + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Get model. + description: >- + Get model. + + Get a model by its identifier. + parameters: + - name: model_id + in: path + description: The identifier of the model to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Unregister model. + description: >- + Unregister model. + + Unregister a model. + parameters: + - name: model_id + in: path + description: >- + The identifier of the model to unregister. + required: true + schema: + type: string + deprecated: false + /v1/moderations: + post: + responses: + '200': + description: A moderation object. + content: + application/json: + schema: + $ref: '#/components/schemas/ModerationObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Safety + summary: Create moderation. + description: >- + Create moderation. + + Classifies if text and/or image inputs are potentially harmful. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunModerationRequest' + required: true + deprecated: false + /v1/prompts: + get: + responses: + '200': + description: >- + A ListPromptsResponse containing all prompts. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPromptsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: List all prompts. + description: List all prompts. + parameters: [] + deprecated: false + post: + responses: + '200': + description: The created Prompt resource. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Create prompt. + description: >- + Create prompt. + + Create a new prompt. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + required: true + deprecated: false + /v1/prompts/{prompt_id}: + get: + responses: + '200': + description: A Prompt resource. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Get prompt. + description: >- + Get prompt. + + Get a prompt by its identifier and optional version. + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to get. + required: true + schema: + type: string + - name: version + in: query + description: >- + The version of the prompt to get (defaults to latest). + required: false + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: >- + The updated Prompt resource with incremented version. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Update prompt. + description: >- + Update prompt. + + Update an existing prompt (increments version). + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Delete prompt. + description: >- + Delete prompt. + + Delete a prompt. + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to delete. + required: true + schema: + type: string + deprecated: false + /v1/prompts/{prompt_id}/set-default-version: + post: + responses: + '200': + description: >- + The prompt with the specified version now set as default. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Set prompt version. + description: >- + Set prompt version. + + Set which version of a prompt should be the default in get_prompt (latest). + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SetDefaultVersionRequest' + required: true + deprecated: false + /v1/prompts/{prompt_id}/versions: + get: + responses: + '200': + description: >- + A ListPromptsResponse containing all versions of the prompt. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPromptsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: List prompt versions. + description: >- + List prompt versions. + + List all versions of a specific prompt. + parameters: + - name: prompt_id + in: path + description: >- + The identifier of the prompt to list versions for. + required: true + schema: + type: string + deprecated: false + /v1/providers: + get: + responses: + '200': + description: >- + A ListProvidersResponse containing information about all providers. + content: + application/json: + schema: + $ref: '#/components/schemas/ListProvidersResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Providers + summary: List providers. + description: >- + List providers. + + List all available providers. + parameters: [] + deprecated: false + /v1/providers/{provider_id}: + get: + responses: + '200': + description: >- + A ProviderInfo object containing the provider's details. + content: + application/json: + schema: + $ref: '#/components/schemas/ProviderInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Providers + summary: Get provider. + description: >- + Get provider. + + Get detailed information about a specific provider. + parameters: + - name: provider_id + in: path + description: The ID of the provider to inspect. + required: true + schema: + type: string + deprecated: false + /v1/responses: + get: + responses: + '200': + description: A ListOpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all responses. + description: List all responses. + parameters: + - name: after + in: query + description: The ID of the last response to return. + required: false + schema: + type: string + - name: limit + in: query + description: The number of responses to return. + required: false + schema: + type: integer + - name: model + in: query + description: The model to filter responses by. + required: false + schema: + type: string + - name: order + in: query + description: >- + The order to sort responses by when sorted by created_at ('asc' or 'desc'). + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + post: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIResponseObjectStream' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a model response. + description: Create a model response. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateOpenaiResponseRequest' + required: true + deprecated: false + x-llama-stack-extra-body-params: + - name: guardrails + schema: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/ResponseGuardrailSpec' + description: >- + List of guardrails to apply during response generation. Guardrails provide + safety and content moderation. + required: false + /v1/responses/{response_id}: + get: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Get a model response. + description: Get a model response. + parameters: + - name: response_id + in: path + description: >- + The ID of the OpenAI response to retrieve. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: An OpenAIDeleteResponseObject + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIDeleteResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Delete a response. + description: Delete a response. + parameters: + - name: response_id + in: path + description: The ID of the OpenAI response to delete. + required: true + schema: + type: string + deprecated: false + /v1/responses/{response_id}/input_items: + get: + responses: + '200': + description: An ListOpenAIResponseInputItem. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIResponseInputItem' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List input items. + description: List input items. + parameters: + - name: response_id + in: path + description: >- + The ID of the response to retrieve input items for. + required: true + schema: + type: string + - name: after + in: query + description: >- + An item ID to list items after, used for pagination. + required: false + schema: + type: string + - name: before + in: query + description: >- + An item ID to list items before, used for pagination. + required: false + schema: + type: string + - name: include + in: query + description: >- + Additional fields to include in the response. + required: false + schema: + type: array + items: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + The order to return the input items in. Default is desc. + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + /v1/safety/run-shield: + post: + responses: + '200': + description: A RunShieldResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/RunShieldResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Safety + summary: Run shield. + description: >- + Run shield. + + Run a shield. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunShieldRequest' + required: true + deprecated: false + /v1/scoring-functions: + get: + responses: + '200': + description: A ListScoringFunctionsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: List all scoring functions. + description: List all scoring functions. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Register a scoring function. + description: Register a scoring function. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterScoringFunctionRequest' + required: true + deprecated: false + /v1/scoring-functions/{scoring_fn_id}: + get: + responses: + '200': + description: A ScoringFn. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoringFn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Get a scoring function by its ID. + description: Get a scoring function by its ID. + parameters: + - name: scoring_fn_id + in: path + description: The ID of the scoring function to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Unregister a scoring function. + description: Unregister a scoring function. + parameters: + - name: scoring_fn_id + in: path + description: >- + The ID of the scoring function to unregister. + required: true + schema: + type: string + deprecated: false + /v1/scoring/score: + post: + responses: + '200': + description: >- + A ScoreResponse object containing rows and aggregated results. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + summary: Score a list of rows. + description: Score a list of rows. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreRequest' + required: true + deprecated: false + /v1/scoring/score-batch: + post: + responses: + '200': + description: A ScoreBatchResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + summary: Score a batch of rows. + description: Score a batch of rows. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreBatchRequest' + required: true + deprecated: false + /v1/shields: + get: + responses: + '200': + description: A ListShieldsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListShieldsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: List all shields. + description: List all shields. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Shield. + content: + application/json: + schema: + $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Register a shield. + description: Register a shield. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterShieldRequest' + required: true + deprecated: false + /v1/shields/{identifier}: + get: + responses: + '200': + description: A Shield. + content: + application/json: + schema: + $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Get a shield by its identifier. + description: Get a shield by its identifier. + parameters: + - name: identifier + in: path + description: The identifier of the shield to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Unregister a shield. + description: Unregister a shield. + parameters: + - name: identifier + in: path + description: >- + The identifier of the shield to unregister. + required: true + schema: + type: string + deprecated: false + /v1/synthetic-data-generation/generate: + post: + responses: + '200': + description: >- + Response containing filtered synthetic data samples and optional statistics + content: + application/json: + schema: + $ref: '#/components/schemas/SyntheticDataGenerationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - SyntheticDataGeneration (Coming Soon) + summary: >- + Generate synthetic data based on input dialogs and apply filtering. + description: >- + Generate synthetic data based on input dialogs and apply filtering. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SyntheticDataGenerateRequest' + required: true + deprecated: false + /v1/tool-runtime/invoke: + post: + responses: + '200': + description: A ToolInvocationResult. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolInvocationResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: Run a tool with the given arguments. + description: Run a tool with the given arguments. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InvokeToolRequest' + required: true + deprecated: false + /v1/tool-runtime/list-tools: + get: + responses: + '200': + description: A ListToolDefsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolDefsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: List all tools in the runtime. + description: List all tools in the runtime. + parameters: + - name: tool_group_id + in: query + description: >- + The ID of the tool group to list tools for. + required: false + schema: + type: string + - name: mcp_endpoint + in: query + description: >- + The MCP endpoint to use for the tool group. + required: false + schema: + $ref: '#/components/schemas/URL' + deprecated: false + /v1/tool-runtime/rag-tool/insert: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: >- + Index documents so they can be used by the RAG system. + description: >- + Index documents so they can be used by the RAG system. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InsertRequest' + required: true + deprecated: false + /v1/tool-runtime/rag-tool/query: + post: + responses: + '200': + description: >- + RAGQueryResult containing the retrieved content and metadata + content: + application/json: + schema: + $ref: '#/components/schemas/RAGQueryResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: >- + Query the RAG system for context; typically invoked by the agent. + description: >- + Query the RAG system for context; typically invoked by the agent. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QueryRequest' + required: true + deprecated: false + /v1/toolgroups: + get: + responses: + '200': + description: A ListToolGroupsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: List tool groups with optional provider. + description: List tool groups with optional provider. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Register a tool group. + description: Register a tool group. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterToolGroupRequest' + required: true + deprecated: false + /v1/toolgroups/{toolgroup_id}: + get: + responses: + '200': + description: A ToolGroup. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolGroup' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Get a tool group by its ID. + description: Get a tool group by its ID. + parameters: + - name: toolgroup_id + in: path + description: The ID of the tool group to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Unregister a tool group. + description: Unregister a tool group. + parameters: + - name: toolgroup_id + in: path + description: The ID of the tool group to unregister. + required: true + schema: + type: string + deprecated: false + /v1/tools: + get: + responses: + '200': + description: A ListToolDefsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolDefsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: List tools with optional tool group. + description: List tools with optional tool group. + parameters: + - name: toolgroup_id + in: query + description: >- + The ID of the tool group to list tools for. + required: false + schema: + type: string + deprecated: false + /v1/tools/{tool_name}: + get: + responses: + '200': + description: A ToolDef. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolDef' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Get a tool by its name. + description: Get a tool by its name. + parameters: + - name: tool_name + in: path + description: The name of the tool to get. + required: true + schema: + type: string + deprecated: false + /v1/vector-io/insert: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Insert chunks into a vector database. + description: Insert chunks into a vector database. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InsertChunksRequest' + required: true + deprecated: false + /v1/vector-io/query: + post: + responses: + '200': + description: A QueryChunksResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Query chunks from a vector database. + description: Query chunks from a vector database. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksRequest' + required: true + deprecated: false + /v1/vector_stores: + get: + responses: + '200': + description: >- + A VectorStoreListResponse containing the list of vector stores. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreListResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Returns a list of vector stores. + description: Returns a list of vector stores. + parameters: + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + type: string + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + A cursor for use in pagination. `before` is an object ID that defines + your place in the list. + required: false + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreObject representing the created vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}: + get: + responses: + '200': + description: >- + A VectorStoreObject representing the vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieves a vector store. + description: Retrieves a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to retrieve. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreObject representing the updated vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Updates a vector store. + description: Updates a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: >- + A VectorStoreDeleteResponse indicating the deletion status. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Delete a vector store. + description: Delete a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to delete. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches: + post: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the created file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to create the file batch for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: + get: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieve a vector store file batch. + description: Retrieve a vector store file batch. + parameters: + - name: batch_id + in: path + description: The ID of the file batch to retrieve. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: + post: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the cancelled file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Cancels a vector store file batch. + description: Cancels a vector store file batch. + parameters: + - name: batch_id + in: path + description: The ID of the file batch to cancel. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: + get: + responses: + '200': + description: >- + A VectorStoreFilesListInBatchResponse containing the list of files in + the batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: >- + Returns a list of vector store files in a batch. + description: >- + Returns a list of vector store files in a batch. + parameters: + - name: batch_id + in: path + description: >- + The ID of the file batch to list files from. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + A cursor for use in pagination. `before` is an object ID that defines + your place in the list. + required: false + schema: + type: string + - name: filter + in: query + description: >- + Filter by file status. One of in_progress, completed, failed, cancelled. + required: false + schema: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/files: + get: + responses: + '200': + description: >- + A VectorStoreListFilesResponse containing the list of files. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreListFilesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: List files in a vector store. + description: List files in a vector store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to list files from. + required: true + schema: + type: string + - name: limit + in: query + description: >- + (Optional) A limit on the number of objects to be returned. Limit can + range between 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + (Optional) Sort order by the `created_at` timestamp of the objects. `asc` + for ascending order and `desc` for descending order. + required: false + schema: + type: string + - name: after + in: query + description: >- + (Optional) A cursor for use in pagination. `after` is an object ID that + defines your place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + (Optional) A cursor for use in pagination. `before` is an object ID that + defines your place in the list. + required: false + schema: + type: string + - name: filter + in: query + description: >- + (Optional) Filter by file status to only return files with the specified + status. + required: false + schema: + $ref: '#/components/schemas/VectorStoreFileStatus' + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreFileObject representing the attached file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Attach a file to a vector store. + description: Attach a file to a vector store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to attach the file to. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}/files/{file_id}: + get: + responses: + '200': + description: >- + A VectorStoreFileObject representing the file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieves a vector store file. + description: Retrieves a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to retrieve. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to retrieve. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreFileObject representing the updated file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Updates a vector store file. + description: Updates a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to update. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: >- + A VectorStoreFileDeleteResponse indicating the deletion status. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Delete a vector store file. + description: Delete a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to delete. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to delete. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/files/{file_id}/content: + get: + responses: + '200': + description: >- + A list of InterleavedContent representing the file contents. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileContentsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: >- + Retrieves the contents of a vector store file. + description: >- + Retrieves the contents of a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to retrieve. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to retrieve. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/search: + post: + responses: + '200': + description: >- + A VectorStoreSearchResponse containing the search results. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreSearchResponsePage' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Search for chunks in a vector store. + description: >- + Search for chunks in a vector store. + + Searches a vector store for relevant chunks based on a query and optional + file attribute filters. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to search. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' + required: true + deprecated: false + /v1/version: + get: + responses: + '200': + description: >- + Version information containing the service version number. + content: + application/json: + schema: + $ref: '#/components/schemas/VersionInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: Get version. + description: >- + Get version. + + Get the version of the service. + parameters: [] + deprecated: false + /v1beta/datasetio/append-rows/{dataset_id}: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + summary: Append rows to a dataset. + description: Append rows to a dataset. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to append the rows to. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AppendRowsRequest' + required: true + deprecated: false + /v1beta/datasetio/iterrows/{dataset_id}: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + summary: >- + Get a paginated list of rows from a dataset. + description: >- + Get a paginated list of rows from a dataset. + + Uses offset-based pagination where: + + - start_index: The starting index (0-based). If None, starts from beginning. + + - limit: Number of items to return. If None or -1, returns all items. + + + The response includes: + + - data: List of items for the current page. + + - has_more: Whether there are more items available after this set. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to get the rows from. + required: true + schema: + type: string + - name: start_index + in: query + description: >- + Index into dataset for the first row to get. Get all rows if None. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of rows to get. + required: false + schema: + type: integer + deprecated: false + /v1beta/datasets: + get: + responses: + '200': + description: A ListDatasetsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListDatasetsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: List all datasets. + description: List all datasets. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Dataset. + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Register a new dataset. + description: Register a new dataset. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterDatasetRequest' + required: true + deprecated: false + /v1beta/datasets/{dataset_id}: + get: + responses: + '200': + description: A Dataset. + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Get a dataset by its ID. + description: Get a dataset by its ID. + parameters: + - name: dataset_id + in: path + description: The ID of the dataset to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Unregister a dataset by its ID. + description: Unregister a dataset by its ID. + parameters: + - name: dataset_id + in: path + description: The ID of the dataset to unregister. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all agents. + description: List all agents. + parameters: + - name: start_index + in: query + description: The index to start the pagination from. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of agents to return. + required: false + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: >- + An AgentCreateResponse with the agent ID. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Create an agent with the given configuration. + description: >- + Create an agent with the given configuration. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}: + get: + responses: + '200': + description: An Agent of the agent. + content: + application/json: + schema: + $ref: '#/components/schemas/Agent' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Describe an agent by its ID. + description: Describe an agent by its ID. + parameters: + - name: agent_id + in: path + description: ID of the agent. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Delete an agent by its ID and its associated sessions and turns. + description: >- + Delete an agent by its ID and its associated sessions and turns. + parameters: + - name: agent_id + in: path + description: The ID of the agent to delete. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session: + post: + responses: + '200': + description: An AgentSessionCreateResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentSessionCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a new session for an agent. + description: Create a new session for an agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to create the session for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentSessionRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}: + get: + responses: + '200': + description: A Session. + content: + application/json: + schema: + $ref: '#/components/schemas/Session' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent session by its ID. + description: Retrieve an agent session by its ID. + parameters: + - name: session_id + in: path + description: The ID of the session to get. + required: true + schema: + type: string + - name: agent_id + in: path + description: >- + The ID of the agent to get the session for. + required: true + schema: + type: string + - name: turn_ids + in: query + description: >- + (Optional) List of turn IDs to filter the session by. + required: false + schema: + type: array + items: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Delete an agent session by its ID and its associated turns. + description: >- + Delete an agent session by its ID and its associated turns. + parameters: + - name: session_id + in: path + description: The ID of the session to delete. + required: true + schema: + type: string + - name: agent_id + in: path + description: >- + The ID of the agent to delete the session for. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn: + post: + responses: + '200': + description: >- + If stream=False, returns a Turn object. If stream=True, returns an SSE + event stream of AgentTurnResponseStreamChunk. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + text/event-stream: + schema: + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a new turn for an agent. + description: Create a new turn for an agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to create the turn for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to create the turn for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentTurnRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}: + get: + responses: + '200': + description: A Turn. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent turn by its ID. + description: Retrieve an agent turn by its ID. + parameters: + - name: agent_id + in: path + description: The ID of the agent to get the turn for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to get the turn for. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to get. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: + post: + responses: + '200': + description: >- + A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk + objects. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + text/event-stream: + schema: + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Resume an agent turn with executed tool call responses. + description: >- + Resume an agent turn with executed tool call responses. + + When a Turn has the status `awaiting_input` due to pending input from client + side tool calls, this endpoint can be used to submit the outputs from the + tool calls once they are ready. + parameters: + - name: agent_id + in: path + description: The ID of the agent to resume. + required: true + schema: + type: string + - name: session_id + in: path + description: The ID of the session to resume. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to resume. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ResumeAgentTurnRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: + get: + responses: + '200': + description: An AgentStepResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentStepResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent step by its ID. + description: Retrieve an agent step by its ID. + parameters: + - name: agent_id + in: path + description: The ID of the agent to get the step for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to get the step for. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to get the step for. + required: true + schema: + type: string + - name: step_id + in: path + description: The ID of the step to get. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/sessions: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all session(s) of a given agent. + description: List all session(s) of a given agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to list sessions for. + required: true + schema: + type: string + - name: start_index + in: query + description: The index to start the pagination from. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of sessions to return. + required: false + schema: + type: integer + deprecated: false + /v1alpha/eval/benchmarks: + get: + responses: + '200': + description: A ListBenchmarksResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: List all benchmarks. + description: List all benchmarks. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Register a benchmark. + description: Register a benchmark. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterBenchmarkRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}: + get: + responses: + '200': + description: A Benchmark. + content: + application/json: + schema: + $ref: '#/components/schemas/Benchmark' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Get a benchmark by its ID. + description: Get a benchmark by its ID. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Unregister a benchmark. + description: Unregister a benchmark. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to unregister. + required: true + schema: + type: string + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: + post: + responses: + '200': + description: >- + EvaluateResponse object containing generations and scores. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Evaluate a list of rows on a benchmark. + description: Evaluate a list of rows on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateRowsRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs: + post: + responses: + '200': + description: >- + The job that was created to run the evaluation. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Run an evaluation on a benchmark. + description: Run an evaluation on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunEvalRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: + get: + responses: + '200': + description: The status of the evaluation job. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the status of a job. + description: Get the status of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the status of. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Cancel a job. + description: Cancel a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to cancel. + required: true + schema: + type: string + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: + get: + responses: + '200': + description: The result of the job. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the result of a job. + description: Get the result of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the result of. + required: true + schema: + type: string + deprecated: false + /v1alpha/inference/rerank: + post: + responses: + '200': + description: >- + RerankResponse with indices sorted by relevance score (descending). + content: + application/json: + schema: + $ref: '#/components/schemas/RerankResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: >- + Rerank a list of documents based on their relevance to a query. + description: >- + Rerank a list of documents based on their relevance to a query. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RerankRequest' + required: true + deprecated: false + /v1alpha/post-training/job/artifacts: + get: + responses: + '200': + description: A PostTrainingJobArtifactsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the artifacts of a training job. + description: Get the artifacts of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the artifacts of. + required: true + schema: + type: string + deprecated: false + /v1alpha/post-training/job/cancel: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Cancel a training job. + description: Cancel a training job. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CancelTrainingJobRequest' + required: true + deprecated: false + /v1alpha/post-training/job/status: + get: + responses: + '200': + description: A PostTrainingJobStatusResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobStatusResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the status of a training job. + description: Get the status of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the status of. + required: true + schema: + type: string + deprecated: false + /v1alpha/post-training/jobs: + get: + responses: + '200': + description: A ListPostTrainingJobsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get all training jobs. + description: Get all training jobs. + parameters: [] + deprecated: false + /v1alpha/post-training/preference-optimize: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run preference optimization of a model. + description: Run preference optimization of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/PreferenceOptimizeRequest' + required: true + deprecated: false + /v1alpha/post-training/supervised-fine-tune: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run supervised fine-tuning of a model. + description: Run supervised fine-tuning of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SupervisedFineTuneRequest' + required: true + deprecated: false +jsonSchemaDialect: >- + https://json-schema.org/draft/2020-12/schema +components: + schemas: + Error: + type: object + properties: + status: + type: integer + description: HTTP status code + title: + type: string + description: >- + Error title, a short summary of the error which is invariant for an error + type + detail: + type: string + description: >- + Error detail, a longer human-readable description of the error + instance: + type: string + description: >- + (Optional) A URL which can be used to retrieve more information about + the specific occurrence of the error + additionalProperties: false + required: + - status + - title + - detail + title: Error + description: >- + Error response from the API. Roughly follows RFC 7807. + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + description: >- + List of chat completion objects with their input messages + has_more: + type: boolean + description: >- + Whether there are more completions available beyond this list + first_id: + type: string + description: ID of the first completion in this list + last_id: + type: string + description: ID of the last completion in this list + object: + type: string + const: list + default: list + description: >- + Must be "list" to identify this as a list response + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIChatCompletionResponse + description: >- + Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + type: object + properties: + role: + type: string + const: assistant + default: assistant + description: >- + Must be "assistant" to identify this as the model's response + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The content of the model's response + name: + type: string + description: >- + (Optional) The name of the assistant message participant. + tool_calls: + type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + description: >- + List of tool calls. Each tool call is an OpenAIChatCompletionToolCall + object. + additionalProperties: false + required: + - role + title: OpenAIAssistantMessageParam + description: >- + A message containing the model's (assistant) response in an OpenAI-compatible + chat completion request. + "OpenAIChatCompletionContentPartImageParam": + type: object + properties: + type: + type: string + const: image_url + default: image_url + description: >- + Must be "image_url" to identify this as image content + image_url: + $ref: '#/components/schemas/OpenAIImageURL' + description: >- + Image URL specification and processing details + additionalProperties: false + required: + - type + - image_url + title: >- + OpenAIChatCompletionContentPartImageParam + description: >- + Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + - $ref: '#/components/schemas/OpenAIFile' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + file: '#/components/schemas/OpenAIFile' + OpenAIChatCompletionContentPartTextParam: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Must be "text" to identify this as text content + text: + type: string + description: The text content of the message + additionalProperties: false + required: + - type + - text + title: OpenAIChatCompletionContentPartTextParam + description: >- + Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: + type: object + properties: + index: + type: integer + description: >- + (Optional) Index of the tool call in the list + id: + type: string + description: >- + (Optional) Unique identifier for the tool call + type: + type: string + const: function + default: function + description: >- + Must be "function" to identify this as a function call + function: + $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + description: (Optional) Function call details + additionalProperties: false + required: + - type + title: OpenAIChatCompletionToolCall + description: >- + Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: + type: object + properties: + name: + type: string + description: (Optional) Name of the function to call + arguments: + type: string + description: >- + (Optional) Arguments to pass to the function as a JSON string + additionalProperties: false + title: OpenAIChatCompletionToolCallFunction + description: >- + Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: + type: object + properties: + prompt_tokens: + type: integer + description: Number of tokens in the prompt + completion_tokens: + type: integer + description: Number of tokens in the completion + total_tokens: + type: integer + description: Total tokens used (prompt + completion) + prompt_tokens_details: + type: object + properties: + cached_tokens: + type: integer + description: Number of tokens retrieved from cache + additionalProperties: false + title: >- + OpenAIChatCompletionUsagePromptTokensDetails + description: >- + Token details for prompt tokens in OpenAI chat completion usage. + completion_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + description: >- + Number of tokens used for reasoning (o1/o3 models) + additionalProperties: false + title: >- + OpenAIChatCompletionUsageCompletionTokensDetails + description: >- + Token details for output tokens in OpenAI chat completion usage. + additionalProperties: false + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: >- + Usage information for OpenAI chat completion. + OpenAIChoice: + type: object + properties: + message: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + - $ref: '#/components/schemas/OpenAISystemMessageParam' + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + - $ref: '#/components/schemas/OpenAIToolMessageParam' + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/OpenAIUserMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + description: The message from the model + finish_reason: + type: string + description: The reason the model stopped generating + index: + type: integer + description: The index of the choice + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: >- + A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/OpenAITokenLogProb' + description: >- + (Optional) The log probabilities for the tokens in the message + refusal: + type: array + items: + $ref: '#/components/schemas/OpenAITokenLogProb' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + title: OpenAIChoiceLogprobs + description: >- + The log probabilities for the tokens in the message from an OpenAI-compatible + chat completion response. + OpenAIDeveloperMessageParam: + type: object + properties: + role: + type: string + const: developer + default: developer + description: >- + Must be "developer" to identify this as a developer message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The content of the developer message + name: + type: string + description: >- + (Optional) The name of the developer message participant. + additionalProperties: false + required: + - role + - content + title: OpenAIDeveloperMessageParam + description: >- + A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: + type: object + properties: + type: + type: string + const: file + default: file + file: + $ref: '#/components/schemas/OpenAIFileFile' + additionalProperties: false + required: + - type + - file + title: OpenAIFile + OpenAIFileFile: + type: object + properties: + file_data: + type: string + file_id: + type: string + filename: + type: string + additionalProperties: false + title: OpenAIFileFile + OpenAIImageURL: + type: object + properties: + url: + type: string + description: >- + URL of the image to include in the message + detail: + type: string + description: >- + (Optional) Level of detail for image processing. Can be "low", "high", + or "auto" + additionalProperties: false + required: + - url + title: OpenAIImageURL + description: >- + Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + - $ref: '#/components/schemas/OpenAISystemMessageParam' + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + - $ref: '#/components/schemas/OpenAIToolMessageParam' + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/OpenAIUserMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + OpenAISystemMessageParam: + type: object + properties: + role: + type: string + const: system + default: system + description: >- + Must be "system" to identify this as a system message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: >- + The content of the "system prompt". If multiple system messages are provided, + they are concatenated. The underlying Llama Stack code may also add other + system messages (for example, for formatting tool definitions). + name: + type: string + description: >- + (Optional) The name of the system message participant. + additionalProperties: false + required: + - role + - content + title: OpenAISystemMessageParam + description: >- + A system message providing instructions or context to the model. + OpenAITokenLogProb: + type: object + properties: + token: + type: string + bytes: + type: array + items: + type: integer + logprob: + type: number + top_logprobs: + type: array + items: + $ref: '#/components/schemas/OpenAITopLogProb' + additionalProperties: false + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: >- + The log probability for a token from an OpenAI-compatible chat completion + response. + OpenAIToolMessageParam: + type: object + properties: + role: + type: string + const: tool + default: tool + description: >- + Must be "tool" to identify this as a tool response + tool_call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The response content from the tool + additionalProperties: false + required: + - role + - tool_call_id + - content + title: OpenAIToolMessageParam + description: >- + A message representing the result of a tool invocation in an OpenAI-compatible + chat completion request. + OpenAITopLogProb: + type: object + properties: + token: + type: string + bytes: + type: array + items: + type: integer + logprob: + type: number + additionalProperties: false + required: + - token + - logprob + title: OpenAITopLogProb + description: >- + The top log probability for a token from an OpenAI-compatible chat completion + response. + OpenAIUserMessageParam: + type: object + properties: + role: + type: string + const: user + default: user + description: >- + Must be "user" to identify this as a user message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' + description: >- + The content of the message, which can include text and other media + name: + type: string + description: >- + (Optional) The name of the user message participant. + additionalProperties: false + required: + - role + - content + title: OpenAIUserMessageParam + description: >- + A message from the user in an OpenAI-compatible chat completion request. + OpenAIJSONSchema: + type: object + properties: + name: + type: string + description: Name of the schema + description: + type: string + description: (Optional) Description of the schema + strict: + type: boolean + description: >- + (Optional) Whether to enforce strict adherence to the schema + schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The JSON schema definition + additionalProperties: false + required: + - name + title: OpenAIJSONSchema + description: >- + JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: + type: object + properties: + type: + type: string + const: json_object + default: json_object + description: >- + Must be "json_object" to indicate generic JSON object response format + additionalProperties: false + required: + - type + title: OpenAIResponseFormatJSONObject + description: >- + JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: + type: object + properties: + type: + type: string + const: json_schema + default: json_schema + description: >- + Must be "json_schema" to indicate structured JSON response format + json_schema: + $ref: '#/components/schemas/OpenAIJSONSchema' + description: >- + The JSON schema specification for the response + additionalProperties: false + required: + - type + - json_schema + title: OpenAIResponseFormatJSONSchema + description: >- + JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/OpenAIResponseFormatText' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + OpenAIResponseFormatText: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Must be "text" to indicate plain text response format + additionalProperties: false + required: + - type + title: OpenAIResponseFormatText + description: >- + Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be registered with + Llama Stack and available via the /models endpoint. + messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + description: List of messages in the conversation. + frequency_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + function_call: + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The function call to use. + functions: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) List of functions to use. + logit_bias: + type: object + additionalProperties: + type: number + description: (Optional) The logit bias to use. + logprobs: + type: boolean + description: (Optional) The log probabilities to use. + max_completion_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + max_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + n: + type: integer + description: >- + (Optional) The number of completions to generate. + parallel_tool_calls: + type: boolean + description: >- + (Optional) Whether to parallelize tool calls. + presence_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + response_format: + $ref: '#/components/schemas/OpenAIResponseFormatParam' + description: (Optional) The response format to use. + seed: + type: integer + description: (Optional) The seed to use. + stop: + oneOf: + - type: string + - type: array + items: + type: string + description: (Optional) The stop tokens to use. + stream: + type: boolean + description: >- + (Optional) Whether to stream the response. + stream_options: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The stream options to use. + temperature: + type: number + description: (Optional) The temperature to use. + tool_choice: + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The tool choice to use. + tools: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The tools to use. + top_logprobs: + type: integer + description: >- + (Optional) The top log probabilities to use. + top_p: + type: number + description: (Optional) The top p to use. + user: + type: string + description: (Optional) The user to use. + additionalProperties: false + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + additionalProperties: false + required: + - id + - choices + - object + - created + - model + title: OpenAIChatCompletion + description: >- + Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChunkChoice' + description: List of choices + object: + type: string + const: chat.completion.chunk + default: chat.completion.chunk + description: >- + The object type, which will be "chat.completion.chunk" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information (typically included in final chunk with stream_options) + additionalProperties: false + required: + - id + - choices + - object + - created + - model + title: OpenAIChatCompletionChunk + description: >- + Chunk from a streaming response to an OpenAI-compatible chat completion request. + OpenAIChoiceDelta: + type: object + properties: + content: + type: string + description: (Optional) The content of the delta + refusal: + type: string + description: (Optional) The refusal of the delta + role: + type: string + description: (Optional) The role of the delta + tool_calls: + type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + description: (Optional) The tool calls of the delta + reasoning_content: + type: string + description: >- + (Optional) The reasoning content from the model (non-standard, for o1/o3 + models) + additionalProperties: false + title: OpenAIChoiceDelta + description: >- + A delta from an OpenAI-compatible chat completion streaming response. + OpenAIChunkChoice: + type: object + properties: + delta: + $ref: '#/components/schemas/OpenAIChoiceDelta' + description: The delta from the chunk + finish_reason: + type: string + description: The reason the model stopped generating + index: + type: integer + description: The index of the choice + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + required: + - delta + - finish_reason + - index + title: OpenAIChunkChoice + description: >- + A chunk choice from an OpenAI-compatible chat completion streaming response. + OpenAICompletionWithInputMessages: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + OpenAICompletionRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be registered with + Llama Stack and available via the /models endpoint. + prompt: + oneOf: + - type: string + - type: array + items: + type: string + - type: array + items: + type: integer + - type: array + items: + type: array + items: + type: integer + description: The prompt to generate a completion for. + best_of: + type: integer + description: >- + (Optional) The number of completions to generate. + echo: + type: boolean + description: (Optional) Whether to echo the prompt. + frequency_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + logit_bias: + type: object + additionalProperties: + type: number + description: (Optional) The logit bias to use. + logprobs: + type: boolean + description: (Optional) The log probabilities to use. + max_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + n: + type: integer + description: >- + (Optional) The number of completions to generate. + presence_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + seed: + type: integer + description: (Optional) The seed to use. + stop: + oneOf: + - type: string + - type: array + items: + type: string + description: (Optional) The stop tokens to use. + stream: + type: boolean + description: >- + (Optional) Whether to stream the response. + stream_options: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The stream options to use. + temperature: + type: number + description: (Optional) The temperature to use. + top_p: + type: number + description: (Optional) The top p to use. + user: + type: string + description: (Optional) The user to use. + suffix: + type: string + description: >- + (Optional) The suffix that should be appended to the completion. + additionalProperties: false + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: + type: object + properties: + id: + type: string + choices: + type: array + items: + $ref: '#/components/schemas/OpenAICompletionChoice' + created: + type: integer + model: + type: string + object: + type: string + const: text_completion + default: text_completion + additionalProperties: false + required: + - id + - choices + - created + - model + - object + title: OpenAICompletion + description: >- + Response from an OpenAI-compatible completion request. + OpenAICompletionChoice: + type: object + properties: + finish_reason: + type: string + text: + type: string + index: + type: integer + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + additionalProperties: false + required: + - finish_reason + - text + - index + title: OpenAICompletionChoice + description: >- + A choice from an OpenAI-compatible completion response. + ConversationItem: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + description: >- + Annotation type identifier, always "url_citation" + end_index: + type: integer + description: >- + End position of the citation span in the content + start_index: + type: integer + description: >- + Start position of the citation span in the content + title: + type: string + description: Title of the referenced web resource + url: + type: string + description: URL of the referenced web resource + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: >- + URL citation annotation for referencing external web resources. + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + description: >- + Annotation type identifier, always "file_citation" + file_id: + type: string + description: Unique identifier of the referenced file + filename: + type: string + description: Name of the referenced file + index: + type: integer + description: >- + Position index of the citation within the content + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: >- + File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + OpenAIResponseContentPartRefusal: + type: object + properties: + type: + type: string + const: refusal + default: refusal + description: >- + Content part type identifier, always "refusal" + refusal: + type: string + description: Refusal text supplied by the model + additionalProperties: false + required: + - type + - refusal + title: OpenAIResponseContentPartRefusal + description: >- + Refusal content within a streamed response part. + "OpenAIResponseInputFunctionToolCallOutput": + type: object + properties: + call_id: + type: string + output: + type: string + type: + type: string + const: function_call_output + default: function_call_output + id: + type: string + status: + type: string + additionalProperties: false + required: + - call_id + - output + - type + title: >- + OpenAIResponseInputFunctionToolCallOutput + description: >- + This represents the output of a function call that gets passed back to the + model. + OpenAIResponseInputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + discriminator: + propertyName: type + mapping: + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + OpenAIResponseInputMessageContentImage: + type: object + properties: + detail: + oneOf: + - type: string + const: low + - type: string + const: high + - type: string + const: auto + default: auto + description: >- + Level of detail for image processing, can be "low", "high", or "auto" + type: + type: string + const: input_image + default: input_image + description: >- + Content type identifier, always "input_image" + image_url: + type: string + description: (Optional) URL of the image content + additionalProperties: false + required: + - detail + - type + title: OpenAIResponseInputMessageContentImage + description: >- + Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: + type: object + properties: + text: + type: string + description: The text content of the input message + type: + type: string + const: input_text + default: input_text + description: >- + Content type identifier, always "input_text" + additionalProperties: false + required: + - text + - type + title: OpenAIResponseInputMessageContentText + description: >- + Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + type: object + properties: + arguments: + type: string + id: + type: string + name: + type: string + server_label: + type: string + type: + type: string + const: mcp_approval_request + default: mcp_approval_request + additionalProperties: false + required: + - arguments + - id + - name + - server_label + - type + title: OpenAIResponseMCPApprovalRequest + description: >- + A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + type: object + properties: + approval_request_id: + type: string + approve: + type: boolean + type: + type: string + const: mcp_approval_response + default: mcp_approval_response + id: + type: string + reason: + type: string + additionalProperties: false + required: + - approval_request_id + - approve + - type + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. + OpenAIResponseMessage: + type: object + properties: + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessageContent' + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + role: + oneOf: + - type: string + const: system + - type: string + const: developer + - type: string + const: user + - type: string + const: assistant + type: + type: string + const: message + default: message + id: + type: string + status: + type: string + additionalProperties: false + required: + - content + - role + - type + title: OpenAIResponseMessage + description: >- + Corresponds to the various Message types in the Responses API. They are all + under one type because the Responses API gives them all the same "type" value, + and there is no way to tell them apart in certain scenarios. + OpenAIResponseOutputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + "OpenAIResponseOutputMessageContentOutputText": + type: object + properties: + text: + type: string + type: + type: string + const: output_text + default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + additionalProperties: false + required: + - text + - type + - annotations + title: >- + OpenAIResponseOutputMessageContentOutputText + "OpenAIResponseOutputMessageFileSearchToolCall": + type: object + properties: + id: + type: string + description: Unique identifier for this tool call + queries: + type: array + items: + type: string + description: List of search queries executed + status: + type: string + description: >- + Current status of the file search operation + type: + type: string + const: file_search_call + default: file_search_call + description: >- + Tool call type identifier, always "file_search_call" + results: + type: array + items: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value attributes associated with the file + file_id: + type: string + description: >- + Unique identifier of the file containing the result + filename: + type: string + description: Name of the file containing the result + score: + type: number + description: >- + Relevance score for this search result (between 0 and 1) + text: + type: string + description: Text content of the search result + additionalProperties: false + required: + - attributes + - file_id + - filename + - score + - text + title: >- + OpenAIResponseOutputMessageFileSearchToolCallResults + description: >- + Search results returned by the file search operation. + description: >- + (Optional) Search results returned by the file search operation + additionalProperties: false + required: + - id + - queries + - status + - type + title: >- + OpenAIResponseOutputMessageFileSearchToolCall + description: >- + File search tool call output message for OpenAI responses. + "OpenAIResponseOutputMessageFunctionToolCall": + type: object + properties: + call_id: + type: string + description: Unique identifier for the function call + name: + type: string + description: Name of the function being called + arguments: + type: string + description: >- + JSON string containing the function arguments + type: + type: string + const: function_call + default: function_call + description: >- + Tool call type identifier, always "function_call" + id: + type: string + description: >- + (Optional) Additional identifier for the tool call + status: + type: string + description: >- + (Optional) Current status of the function call execution + additionalProperties: false + required: + - call_id + - name + - arguments + - type + title: >- + OpenAIResponseOutputMessageFunctionToolCall + description: >- + Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: + type: object + properties: + id: + type: string + description: Unique identifier for this MCP call + type: + type: string + const: mcp_call + default: mcp_call + description: >- + Tool call type identifier, always "mcp_call" + arguments: + type: string + description: >- + JSON string containing the MCP call arguments + name: + type: string + description: Name of the MCP method being called + server_label: + type: string + description: >- + Label identifying the MCP server handling the call + error: + type: string + description: >- + (Optional) Error message if the MCP call failed + output: + type: string + description: >- + (Optional) Output result from the successful MCP call + additionalProperties: false + required: + - id + - type + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: >- + Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: + type: object + properties: + id: + type: string + description: >- + Unique identifier for this MCP list tools operation + type: + type: string + const: mcp_list_tools + default: mcp_list_tools + description: >- + Tool call type identifier, always "mcp_list_tools" + server_label: + type: string + description: >- + Label identifying the MCP server providing the tools + tools: + type: array + items: + type: object + properties: + input_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + JSON schema defining the tool's input parameters + name: + type: string + description: Name of the tool + description: + type: string + description: >- + (Optional) Description of what the tool does + additionalProperties: false + required: + - input_schema + - name + title: MCPListToolsTool + description: >- + Tool definition returned by MCP list tools operation. + description: >- + List of available tools provided by the MCP server + additionalProperties: false + required: + - id + - type + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: >- + MCP list tools output message containing available tools from an MCP server. + "OpenAIResponseOutputMessageWebSearchToolCall": + type: object + properties: + id: + type: string + description: Unique identifier for this tool call + status: + type: string + description: >- + Current status of the web search operation + type: + type: string + const: web_search_call + default: web_search_call + description: >- + Tool call type identifier, always "web_search_call" + additionalProperties: false + required: + - id + - status + - type + title: >- + OpenAIResponseOutputMessageWebSearchToolCall + description: >- + Web search tool call output message for OpenAI responses. + CreateConversationRequest: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + description: >- + Initial items to include in the conversation context. + metadata: + type: object + additionalProperties: + type: string + description: >- + Set of key-value pairs that can be attached to an object. + additionalProperties: false + title: CreateConversationRequest + Conversation: + type: object + properties: + id: + type: string + object: + type: string + const: conversation + default: conversation + created_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + items: + type: array + items: + type: object + title: dict + description: >- + dict() -> new empty dictionary dict(mapping) -> new dictionary initialized + from a mapping object's (key, value) pairs dict(iterable) -> new + dictionary initialized as if via: d = {} for k, v in iterable: d[k] + = v dict(**kwargs) -> new dictionary initialized with the name=value + pairs in the keyword argument list. For example: dict(one=1, two=2) + additionalProperties: false + required: + - id + - object + - created_at + title: Conversation + description: OpenAI-compatible conversation object. + UpdateConversationRequest: + type: object + properties: + metadata: + type: object + additionalProperties: + type: string + description: >- + Set of key-value pairs that can be attached to an object. + additionalProperties: false + required: + - metadata + title: UpdateConversationRequest + ConversationDeletedResource: + type: object + properties: + id: + type: string + object: + type: string + default: conversation.deleted + deleted: + type: boolean + default: true + additionalProperties: false + required: + - id + - object + - deleted + title: ConversationDeletedResource + description: Response for deleted conversation. + ConversationItemList: + type: object + properties: + object: + type: string + default: list + data: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ConversationItemList + description: >- + List of conversation items with pagination. + AddItemsRequest: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + description: >- + Items to include in the conversation context. + additionalProperties: false + required: + - items + title: AddItemsRequest + ConversationItemDeletedResource: + type: object + properties: + id: + type: string + object: + type: string + default: conversation.item.deleted + deleted: + type: boolean + default: true + additionalProperties: false + required: + - id + - object + - deleted + title: ConversationItemDeletedResource + description: Response for deleted conversation item. + OpenAIEmbeddingsRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be an embedding model + registered with Llama Stack and available via the /models endpoint. + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input text to embed, encoded as a string or array of strings. To embed + multiple inputs in a single request, pass an array of strings. + encoding_format: + type: string + default: float + description: >- + (Optional) The format to return the embeddings in. Can be either "float" + or "base64". Defaults to "float". + dimensions: + type: integer + description: >- + (Optional) The number of dimensions the resulting output embeddings should + have. Only supported in text-embedding-3 and later models. + user: + type: string + description: >- + (Optional) A unique identifier representing your end-user, which can help + OpenAI to monitor and detect abuse. + additionalProperties: false + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: + type: object + properties: + object: + type: string + const: embedding + default: embedding + description: >- + The object type, which will be "embedding" + embedding: + oneOf: + - type: array + items: + type: number + - type: string + description: >- + The embedding vector as a list of floats (when encoding_format="float") + or as a base64-encoded string (when encoding_format="base64") + index: + type: integer + description: >- + The index of the embedding in the input list + additionalProperties: false + required: + - object + - embedding + - index + title: OpenAIEmbeddingData + description: >- + A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: + type: object + properties: + prompt_tokens: + type: integer + description: The number of tokens in the input + total_tokens: + type: integer + description: The total number of tokens used + additionalProperties: false + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: >- + Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: + type: object + properties: + object: + type: string + const: list + default: list + description: The object type, which will be "list" + data: + type: array + items: + $ref: '#/components/schemas/OpenAIEmbeddingData' + description: List of embedding data objects + model: + type: string + description: >- + The model that was used to generate the embeddings + usage: + $ref: '#/components/schemas/OpenAIEmbeddingUsage' + description: Usage information + additionalProperties: false + required: + - object + - data + - model + - usage + title: OpenAIEmbeddingsResponse + description: >- + Response from an OpenAI-compatible embeddings request. + OpenAIFilePurpose: + type: string + enum: + - assistants + - batch + title: OpenAIFilePurpose + description: >- + Valid purpose values for OpenAI Files API. + ListOpenAIFileResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIFileObject' + description: List of file objects + has_more: + type: boolean + description: >- + Whether there are more files available beyond this page + first_id: + type: string + description: >- + ID of the first file in the list for pagination + last_id: + type: string + description: >- + ID of the last file in the list for pagination + object: + type: string + const: list + default: list + description: The object type, which is always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIFileResponse + description: >- + Response for listing files in OpenAI Files API. + OpenAIFileObject: + type: object + properties: + object: + type: string + const: file + default: file + description: The object type, which is always "file" + id: + type: string + description: >- + The file identifier, which can be referenced in the API endpoints + bytes: + type: integer + description: The size of the file, in bytes + created_at: + type: integer + description: >- + The Unix timestamp (in seconds) for when the file was created + expires_at: + type: integer + description: >- + The Unix timestamp (in seconds) for when the file expires + filename: + type: string + description: The name of the file + purpose: + type: string + enum: + - assistants + - batch + description: The intended purpose of the file + additionalProperties: false + required: + - object + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: >- + OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: + type: object + properties: + anchor: + type: string + const: created_at + seconds: + type: integer + additionalProperties: false + required: + - anchor + - seconds + title: ExpiresAfter + description: >- + Control expiration of uploaded files. + + Params: + - anchor, must be "created_at" + - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) + OpenAIFileDeleteResponse: + type: object + properties: + id: + type: string + description: The file identifier that was deleted + object: + type: string + const: file + default: file + description: The object type, which is always "file" + deleted: + type: boolean + description: >- + Whether the file was successfully deleted + additionalProperties: false + required: + - id + - object + - deleted + title: OpenAIFileDeleteResponse + description: >- + Response for deleting a file in OpenAI Files API. + Response: + type: object + title: Response + HealthInfo: + type: object + properties: + status: + type: string + enum: + - OK + - Error + - Not Implemented + description: Current health status of the service + additionalProperties: false + required: + - status + title: HealthInfo + description: >- + Health status information for the service. + RouteInfo: + type: object + properties: + route: + type: string + description: The API endpoint path + method: + type: string + description: HTTP method for the route + provider_types: + type: array + items: + type: string + description: >- + List of provider types that implement this route + additionalProperties: false + required: + - route + - method + - provider_types + title: RouteInfo + description: >- + Information about an API route including its path, method, and implementing + providers. + ListRoutesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RouteInfo' + description: >- + List of available route information objects + additionalProperties: false + required: + - data + title: ListRoutesResponse + description: >- + Response containing a list of all available API routes. + Model: + type: object + properties: + identifier: + type: string + description: >- + Unique identifier for this resource in llama stack + provider_resource_id: + type: string + description: >- + Unique identifier for this resource in the provider + provider_id: + type: string + description: >- + ID of the provider that owns this resource + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: model + default: model + description: >- + The resource type, always 'model' for model resources + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + description: >- + The type of model (LLM or embedding model) + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - model_type + title: Model + description: >- + A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + title: ModelType + description: >- + Enumeration of supported model types in Llama Stack. + ListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Model' + additionalProperties: false + required: + - data + title: ListModelsResponse + RegisterModelRequest: + type: object + properties: + model_id: + type: string + description: The identifier of the model to register. + provider_model_id: + type: string + description: >- + The identifier of the model in the provider. + provider_id: + type: string + description: The identifier of the provider. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model. + model_type: + $ref: '#/components/schemas/ModelType' + description: The type of model to register. + additionalProperties: false + required: + - model_id + title: RegisterModelRequest + RunModerationRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input (or inputs) to classify. Can be a single string, an array of strings, + or an array of multi-modal input objects similar to other models. + model: + type: string + description: >- + The content moderation model you would like to use. + additionalProperties: false + required: + - input + - model + title: RunModerationRequest + ModerationObject: + type: object + properties: + id: + type: string + description: >- + The unique identifier for the moderation request. + model: + type: string + description: >- + The model used to generate the moderation results. + results: + type: array + items: + $ref: '#/components/schemas/ModerationObjectResults' + description: A list of moderation objects + additionalProperties: false + required: + - id + - model + - results + title: ModerationObject + description: A moderation object. + ModerationObjectResults: + type: object + properties: + flagged: + type: boolean + description: >- + Whether any of the below categories are flagged. + categories: + type: object + additionalProperties: + type: boolean + description: >- + A list of the categories, and whether they are flagged or not. + category_applied_input_types: + type: object + additionalProperties: + type: array + items: + type: string + description: >- + A list of the categories along with the input type(s) that the score applies + to. + category_scores: + type: object + additionalProperties: + type: number + description: >- + A list of the categories along with their scores as predicted by model. + user_message: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - flagged + - metadata + title: ModerationObjectResults + description: A moderation object. + Prompt: + type: object + properties: + prompt: + type: string + description: >- + The system prompt text with variable placeholders. Variables are only + supported when using the Responses API. + version: + type: integer + description: >- + Version (integer starting at 1, incremented on save) + prompt_id: + type: string + description: >- + Unique identifier formatted as 'pmpt_<48-digit-hash>' + variables: + type: array + items: + type: string + description: >- + List of prompt variable names that can be used in the prompt template + is_default: + type: boolean + default: false + description: >- + Boolean indicating whether this version is the default version for this + prompt + additionalProperties: false + required: + - version + - prompt_id + - variables + - is_default + title: Prompt + description: >- + A prompt resource representing a stored OpenAI Compatible prompt template + in Llama Stack. + ListPromptsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Prompt' + additionalProperties: false + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + CreatePromptRequest: + type: object + properties: + prompt: + type: string + description: >- + The prompt text content with variable placeholders. + variables: + type: array + items: + type: string + description: >- + List of variable names that can be used in the prompt template. + additionalProperties: false + required: + - prompt + title: CreatePromptRequest + UpdatePromptRequest: + type: object + properties: + prompt: + type: string + description: The updated prompt text content. + version: + type: integer + description: >- + The current version of the prompt being updated. + variables: + type: array + items: + type: string + description: >- + Updated list of variable names that can be used in the prompt template. + set_as_default: + type: boolean + description: >- + Set the new version as the default (default=True). + additionalProperties: false + required: + - prompt + - version + - set_as_default + title: UpdatePromptRequest + SetDefaultVersionRequest: + type: object + properties: + version: + type: integer + description: The version to set as default. + additionalProperties: false + required: + - version + title: SetDefaultVersionRequest + ProviderInfo: + type: object + properties: + api: + type: string + description: The API name this provider implements + provider_id: + type: string + description: Unique identifier for the provider + provider_type: + type: string + description: The type of provider implementation + config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Configuration parameters for the provider + health: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Current health status of the provider + additionalProperties: false + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: >- + Information about a registered provider including its configuration and health + status. + ListProvidersResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ProviderInfo' + description: List of provider information objects + additionalProperties: false + required: + - data + title: ListProvidersResponse + description: >- + Response containing a list of all available providers. + ListOpenAIResponseObject: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseObjectWithInput' + description: >- + List of response objects with their input context + has_more: + type: boolean + description: >- + Whether there are more results available beyond this page + first_id: + type: string + description: >- + Identifier of the first item in this page + last_id: + type: string + description: Identifier of the last item in this page + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIResponseObject + description: >- + Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: + type: object + properties: + code: + type: string + description: >- + Error code identifying the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: OpenAIResponseError + description: >- + Error details for failed OpenAI response requests. + OpenAIResponseInput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMessage' + OpenAIResponseInputToolFileSearch: + type: object + properties: + type: + type: string + const: file_search + default: file_search + description: >- + Tool type identifier, always "file_search" + vector_store_ids: + type: array + items: + type: string + description: >- + List of vector store identifiers to search within + filters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional filters to apply to the search + max_num_results: + type: integer + default: 10 + description: >- + (Optional) Maximum number of search results to return (1-50) + ranking_options: + type: object + properties: + ranker: + type: string + description: >- + (Optional) Name of the ranking algorithm to use + score_threshold: + type: number + default: 0.0 + description: >- + (Optional) Minimum relevance score threshold for results + additionalProperties: false + description: >- + (Optional) Options for ranking and scoring search results + additionalProperties: false + required: + - type + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: >- + File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: + type: object + properties: + type: + type: string + const: function + default: function + description: Tool type identifier, always "function" + name: + type: string + description: Name of the function that can be called + description: + type: string + description: >- + (Optional) Description of what the function does + parameters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON schema defining the function's parameters + strict: + type: boolean + description: >- + (Optional) Whether to enforce strict parameter validation + additionalProperties: false + required: + - type + - name + title: OpenAIResponseInputToolFunction + description: >- + Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: + type: object + properties: + type: + oneOf: + - type: string + const: web_search + - type: string + const: web_search_preview + - type: string + const: web_search_preview_2025_03_11 + default: web_search + description: Web search tool type variant to use + search_context_size: + type: string + default: medium + description: >- + (Optional) Size of search context, must be "low", "medium", or "high" + additionalProperties: false + required: + - type + title: OpenAIResponseInputToolWebSearch + description: >- + Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseTool' + description: >- + (Optional) An array of tools the model may call while generating a response. + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + usage: + $ref: '#/components/schemas/OpenAIResponseUsage' + description: >- + (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context + input: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + List of input items that led to this response + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + - input + title: OpenAIResponseObjectWithInput + description: >- + OpenAI response object extended with input context information. + OpenAIResponseOutput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + OpenAIResponseText: + type: object + properties: + format: + type: object + properties: + type: + oneOf: + - type: string + const: text + - type: string + const: json_schema + - type: string + const: json_object + description: >- + Must be "text", "json_schema", or "json_object" to identify the format + type + name: + type: string + description: >- + The name of the response format. Only used for json_schema. + schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The JSON schema the response should conform to. In a Python SDK, this + is often a `pydantic` model. Only used for json_schema. + description: + type: string + description: >- + (Optional) A description of the response format. Only used for json_schema. + strict: + type: boolean + description: >- + (Optional) Whether to strictly enforce the JSON schema. If true, the + response must match the schema exactly. Only used for json_schema. + additionalProperties: false + required: + - type + description: >- + (Optional) Text format configuration specifying output format requirements + additionalProperties: false + title: OpenAIResponseText + description: >- + Text response configuration for OpenAI responses. + OpenAIResponseTool: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + discriminator: + propertyName: type + mapping: + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + OpenAIResponseToolMCP: + type: object + properties: + type: + type: string + const: mcp + default: mcp + description: Tool type identifier, always "mcp" + server_label: + type: string + description: Label to identify this MCP server + allowed_tools: + oneOf: + - type: array + items: + type: string + - type: object + properties: + tool_names: + type: array + items: + type: string + description: >- + (Optional) List of specific tool names that are allowed + additionalProperties: false + title: AllowedToolsFilter + description: >- + Filter configuration for restricting which MCP tools can be used. + description: >- + (Optional) Restriction on which tools can be used from this server + additionalProperties: false + required: + - type + - server_label + title: OpenAIResponseToolMCP + description: >- + Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: + type: object + properties: + input_tokens: + type: integer + description: Number of tokens in the input + output_tokens: + type: integer + description: Number of tokens in the output + total_tokens: + type: integer + description: Total tokens used (input + output) + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + description: Number of tokens retrieved from cache + additionalProperties: false + description: Detailed breakdown of input token usage + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + description: >- + Number of tokens used for reasoning (o1/o3 models) + additionalProperties: false + description: Detailed breakdown of output token usage + additionalProperties: false + required: + - input_tokens + - output_tokens + - total_tokens + title: OpenAIResponseUsage + description: Usage information for OpenAI response. + ResponseGuardrailSpec: + type: object + properties: + type: + type: string + description: The type/identifier of the guardrail. + additionalProperties: false + required: + - type + title: ResponseGuardrailSpec + description: >- + Specification for a guardrail to apply during response generation. + OpenAIResponseInputTool: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + discriminator: + propertyName: type + mapping: + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + OpenAIResponseInputToolMCP: + type: object + properties: + type: + type: string + const: mcp + default: mcp + description: Tool type identifier, always "mcp" + server_label: + type: string + description: Label to identify this MCP server + server_url: + type: string + description: URL endpoint of the MCP server + headers: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) HTTP headers to include when connecting to the server + require_approval: + oneOf: + - type: string + const: always + - type: string + const: never + - type: object + properties: + always: + type: array + items: + type: string + description: >- + (Optional) List of tool names that always require approval + never: + type: array + items: + type: string + description: >- + (Optional) List of tool names that never require approval + additionalProperties: false + title: ApprovalFilter + description: >- + Filter configuration for MCP tool approval requirements. + default: never + description: >- + Approval requirement for tool calls ("always", "never", or filter) + allowed_tools: + oneOf: + - type: array + items: + type: string + - type: object + properties: + tool_names: + type: array + items: + type: string + description: >- + (Optional) List of specific tool names that are allowed + additionalProperties: false + title: AllowedToolsFilter + description: >- + Filter configuration for restricting which MCP tools can be used. + description: >- + (Optional) Restriction on which tools can be used from this server + additionalProperties: false + required: + - type + - server_label + - server_url + - require_approval + title: OpenAIResponseInputToolMCP + description: >- + Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: Input message(s) to create the response. + model: + type: string + description: The underlying LLM used for completions. + instructions: + type: string + previous_response_id: + type: string + description: >- + (Optional) if specified, the new response will be a continuation of the + previous response. This can be used to easily fork-off new responses from + existing responses. + conversation: + type: string + description: >- + (Optional) The ID of a conversation to add the response to. Must begin + with 'conv_'. Input and output messages will be automatically added to + the conversation. + store: + type: boolean + stream: + type: boolean + temperature: + type: number + text: + $ref: '#/components/schemas/OpenAIResponseText' + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputTool' + include: + type: array + items: + type: string + description: >- + (Optional) Additional fields to include in the response. + max_infer_iters: + type: integer + additionalProperties: false + required: + - input + - model + title: CreateOpenaiResponseRequest + OpenAIResponseObject: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseTool' + description: >- + (Optional) An array of tools the model may call while generating a response. + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + usage: + $ref: '#/components/schemas/OpenAIResponseUsage' + description: >- + (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + title: OpenAIResponseObject + description: >- + Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + type: object + properties: + type: + type: string + const: output_text + default: output_text + description: >- + Content part type identifier, always "output_text" + text: + type: string + description: Text emitted for this content part + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + description: >- + Structured annotations associated with the text + logprobs: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) Token log probability details + additionalProperties: false + required: + - type + - text + - annotations + title: OpenAIResponseContentPartOutputText + description: >- + Text content within a streamed response part. + "OpenAIResponseContentPartReasoningSummary": + type: object + properties: + type: + type: string + const: summary_text + default: summary_text + description: >- + Content part type identifier, always "summary_text" + text: + type: string + description: Summary text + additionalProperties: false + required: + - type + - text + title: >- + OpenAIResponseContentPartReasoningSummary + description: >- + Reasoning summary part in a streamed response. + OpenAIResponseContentPartReasoningText: + type: object + properties: + type: + type: string + const: reasoning_text + default: reasoning_text + description: >- + Content part type identifier, always "reasoning_text" + text: + type: string + description: Reasoning text supplied by the model + additionalProperties: false + required: + - type + - text + title: OpenAIResponseContentPartReasoningText + description: >- + Reasoning text emitted as part of a streamed response. + OpenAIResponseObjectStream: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + discriminator: + propertyName: type + mapping: + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + "OpenAIResponseObjectStreamResponseCompleted": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Completed response object + type: + type: string + const: response.completed + default: response.completed + description: >- + Event type identifier, always "response.completed" + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCompleted + description: >- + Streaming event indicating a response has been completed. + "OpenAIResponseObjectStreamResponseContentPartAdded": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the part within the content array + response_id: + type: string + description: >- + Unique identifier of the response containing this content + item_id: + type: string + description: >- + Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response + part: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + description: The content part that was added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.content_part.added + default: response.content_part.added + description: >- + Event type identifier, always "response.content_part.added" + additionalProperties: false + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseContentPartAdded + description: >- + Streaming event for when a new content part is added to a response item. + "OpenAIResponseObjectStreamResponseContentPartDone": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the part within the content array + response_id: + type: string + description: >- + Unique identifier of the response containing this content + item_id: + type: string + description: >- + Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response + part: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + description: The completed content part + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.content_part.done + default: response.content_part.done + description: >- + Event type identifier, always "response.content_part.done" + additionalProperties: false + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseContentPartDone + description: >- + Streaming event for when a content part is completed. + "OpenAIResponseObjectStreamResponseCreated": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: The response object that was created + type: + type: string + const: response.created + default: response.created + description: >- + Event type identifier, always "response.created" + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCreated + description: >- + Streaming event indicating a new response has been created. + OpenAIResponseObjectStreamResponseFailed: + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Response object describing the failure + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.failed + default: response.failed + description: >- + Event type identifier, always "response.failed" + additionalProperties: false + required: + - response + - sequence_number + - type + title: OpenAIResponseObjectStreamResponseFailed + description: >- + Streaming event emitted when a response fails. + "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the completed file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.completed + default: response.file_search_call.completed + description: >- + Event type identifier, always "response.file_search_call.completed" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallCompleted + description: >- + Streaming event for completed file search calls. + "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.in_progress + default: response.file_search_call.in_progress + description: >- + Event type identifier, always "response.file_search_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallInProgress + description: >- + Streaming event for file search calls in progress. + "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.searching + default: response.file_search_call.searching + description: >- + Event type identifier, always "response.file_search_call.searching" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallSearching + description: >- + Streaming event for file search currently searching. + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + type: object + properties: + delta: + type: string + description: >- + Incremental function call arguments being added + item_id: + type: string + description: >- + Unique identifier of the function call being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.function_call_arguments.delta + default: response.function_call_arguments.delta + description: >- + Event type identifier, always "response.function_call_arguments.delta" + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + description: >- + Streaming event for incremental function call argument updates. + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + type: object + properties: + arguments: + type: string + description: >- + Final complete arguments JSON string for the function call + item_id: + type: string + description: >- + Unique identifier of the completed function call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.function_call_arguments.done + default: response.function_call_arguments.done + description: >- + Event type identifier, always "response.function_call_arguments.done" + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + description: >- + Streaming event for when function call arguments are completed. + "OpenAIResponseObjectStreamResponseInProgress": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Current response state while in progress + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.in_progress + default: response.in_progress + description: >- + Event type identifier, always "response.in_progress" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseInProgress + description: >- + Streaming event indicating the response remains in progress. + "OpenAIResponseObjectStreamResponseIncomplete": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: >- + Response object describing the incomplete state + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.incomplete + default: response.incomplete + description: >- + Event type identifier, always "response.incomplete" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseIncomplete + description: >- + Streaming event emitted when a response ends in an incomplete state. + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + type: object + properties: + delta: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.delta + default: response.mcp_call.arguments.delta + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + type: object + properties: + arguments: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.done + default: response.mcp_call.arguments.done + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + "OpenAIResponseObjectStreamResponseMcpCallCompleted": + type: object + properties: + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.completed + default: response.mcp_call.completed + description: >- + Event type identifier, always "response.mcp_call.completed" + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallCompleted + description: Streaming event for completed MCP calls. + "OpenAIResponseObjectStreamResponseMcpCallFailed": + type: object + properties: + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.failed + default: response.mcp_call.failed + description: >- + Event type identifier, always "response.mcp_call.failed" + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallFailed + description: Streaming event for failed MCP calls. + "OpenAIResponseObjectStreamResponseMcpCallInProgress": + type: object + properties: + item_id: + type: string + description: Unique identifier of the MCP call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.in_progress + default: response.mcp_call.in_progress + description: >- + Event type identifier, always "response.mcp_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallInProgress + description: >- + Streaming event for MCP calls in progress. + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.completed + default: response.mcp_list_tools.completed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsCompleted + "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.failed + default: response.mcp_list_tools.failed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsFailed + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.in_progress + default: response.mcp_list_tools.in_progress + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsInProgress + "OpenAIResponseObjectStreamResponseOutputItemAdded": + type: object + properties: + response_id: + type: string + description: >- + Unique identifier of the response containing this output + item: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + description: >- + The output item that was added (message, tool call, etc.) + output_index: + type: integer + description: >- + Index position of this item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_item.added + default: response.output_item.added + description: >- + Event type identifier, always "response.output_item.added" + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemAdded + description: >- + Streaming event for when a new output item is added to the response. + "OpenAIResponseObjectStreamResponseOutputItemDone": + type: object + properties: + response_id: + type: string + description: >- + Unique identifier of the response containing this output + item: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + description: >- + The completed output item (message, tool call, etc.) + output_index: + type: integer + description: >- + Index position of this item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_item.done + default: response.output_item.done + description: >- + Event type identifier, always "response.output_item.done" + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemDone + description: >- + Streaming event for when an output item is completed. + "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the item to which the annotation is being added + output_index: + type: integer + description: >- + Index position of the output item in the response's output array + content_index: + type: integer + description: >- + Index position of the content part within the output item + annotation_index: + type: integer + description: >- + Index of the annotation within the content part + annotation: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + description: The annotation object being added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.annotation.added + default: response.output_text.annotation.added + description: >- + Event type identifier, always "response.output_text.annotation.added" + additionalProperties: false + required: + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + description: >- + Streaming event for when an annotation is added to output text. + "OpenAIResponseObjectStreamResponseOutputTextDelta": + type: object + properties: + content_index: + type: integer + description: Index position within the text content + delta: + type: string + description: Incremental text content being added + item_id: + type: string + description: >- + Unique identifier of the output item being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.delta + default: response.output_text.delta + description: >- + Event type identifier, always "response.output_text.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextDelta + description: >- + Streaming event for incremental text content updates. + "OpenAIResponseObjectStreamResponseOutputTextDone": + type: object + properties: + content_index: + type: integer + description: Index position within the text content + text: + type: string + description: >- + Final complete text content of the output item + item_id: + type: string + description: >- + Unique identifier of the completed output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.done + default: response.output_text.done + description: >- + Event type identifier, always "response.output_text.done" + additionalProperties: false + required: + - content_index + - text + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextDone + description: >- + Streaming event for when text output is completed. + "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + type: object + properties: + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + description: The summary part that was added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_part.added + default: response.reasoning_summary_part.added + description: >- + Event type identifier, always "response.reasoning_summary_part.added" + additionalProperties: false + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + description: >- + Streaming event for when a new reasoning summary part is added. + "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + type: object + properties: + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + description: The completed summary part + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_part.done + default: response.reasoning_summary_part.done + description: >- + Event type identifier, always "response.reasoning_summary_part.done" + additionalProperties: false + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + description: >- + Streaming event for when a reasoning summary part is completed. + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + type: object + properties: + delta: + type: string + description: Incremental summary text being added + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_text.delta + default: response.reasoning_summary_text.delta + description: >- + Event type identifier, always "response.reasoning_summary_text.delta" + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + description: >- + Streaming event for incremental reasoning summary text updates. + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + type: object + properties: + text: + type: string + description: Final complete summary text + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_text.done + default: response.reasoning_summary_text.done + description: >- + Event type identifier, always "response.reasoning_summary_text.done" + additionalProperties: false + required: + - text + - item_id + - output_index + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + description: >- + Streaming event for when reasoning summary text is completed. + "OpenAIResponseObjectStreamResponseReasoningTextDelta": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the reasoning content part + delta: + type: string + description: Incremental reasoning text being added + item_id: + type: string + description: >- + Unique identifier of the output item being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.reasoning_text.delta + default: response.reasoning_text.delta + description: >- + Event type identifier, always "response.reasoning_text.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningTextDelta + description: >- + Streaming event for incremental reasoning text updates. + "OpenAIResponseObjectStreamResponseReasoningTextDone": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the reasoning content part + text: + type: string + description: Final complete reasoning text + item_id: + type: string + description: >- + Unique identifier of the completed output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.reasoning_text.done + default: response.reasoning_text.done + description: >- + Event type identifier, always "response.reasoning_text.done" + additionalProperties: false + required: + - content_index + - text + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningTextDone + description: >- + Streaming event for when reasoning text is completed. + "OpenAIResponseObjectStreamResponseRefusalDelta": + type: object + properties: + content_index: + type: integer + description: Index position of the content part + delta: + type: string + description: Incremental refusal text being added + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.refusal.delta + default: response.refusal.delta + description: >- + Event type identifier, always "response.refusal.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseRefusalDelta + description: >- + Streaming event for incremental refusal text updates. + "OpenAIResponseObjectStreamResponseRefusalDone": + type: object + properties: + content_index: + type: integer + description: Index position of the content part + refusal: + type: string + description: Final complete refusal text + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.refusal.done + default: response.refusal.done + description: >- + Event type identifier, always "response.refusal.done" + additionalProperties: false + required: + - content_index + - refusal + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseRefusalDone + description: >- + Streaming event for when refusal text is completed. + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the completed web search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.web_search_call.completed + default: response.web_search_call.completed + description: >- + Event type identifier, always "response.web_search_call.completed" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallCompleted + description: >- + Streaming event for completed web search calls. + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + type: object + properties: + item_id: + type: string + description: Unique identifier of the web search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.web_search_call.in_progress + default: response.web_search_call.in_progress + description: >- + Event type identifier, always "response.web_search_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallInProgress + description: >- + Streaming event for web search calls in progress. + "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.web_search_call.searching + default: response.web_search_call.searching + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallSearching + OpenAIDeleteResponseObject: + type: object + properties: + id: + type: string + description: >- + Unique identifier of the deleted response + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + deleted: + type: boolean + default: true + description: Deletion confirmation flag, always True + additionalProperties: false + required: + - id + - object + - deleted + title: OpenAIDeleteResponseObject + description: >- + Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: List of input items + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - object + title: ListOpenAIResponseInputItem + description: >- + List container for OpenAI response input items. + RunShieldRequest: + type: object + properties: + shield_id: + type: string + description: The identifier of the shield to run. + messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + description: The messages to run the shield on. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + - messages + - params + title: RunShieldRequest + RunShieldResponse: + type: object + properties: + violation: + $ref: '#/components/schemas/SafetyViolation' + description: >- + (Optional) Safety violation detected by the shield, if any + additionalProperties: false + title: RunShieldResponse + description: Response from running a safety shield. + SafetyViolation: + type: object + properties: + violation_level: + $ref: '#/components/schemas/ViolationLevel' + description: Severity level of the violation + user_message: + type: string + description: >- + (Optional) Message to convey to the user about the violation + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Additional metadata including specific violation codes for debugging and + telemetry + additionalProperties: false + required: + - violation_level + - metadata + title: SafetyViolation + description: >- + Details of a safety violation detected by content moderation. + ViolationLevel: + type: string + enum: + - info + - warn + - error + title: ViolationLevel + description: Severity level of a safety violation. + AgentTurnInputType: + type: object + properties: + type: + type: string + const: agent_turn_input + default: agent_turn_input + description: >- + Discriminator type. Always "agent_turn_input" + additionalProperties: false + required: + - type + title: AgentTurnInputType + description: Parameter type for agent turn input. + AggregationFunctionType: + type: string + enum: + - average + - weighted_average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: >- + Types of aggregation functions for scoring results. + ArrayType: + type: object + properties: + type: + type: string + const: array + default: array + description: Discriminator type. Always "array" + additionalProperties: false + required: + - type + title: ArrayType + description: Parameter type for array values. + BasicScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: basic + default: basic + description: >- + The type of scoring function parameters, always basic + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - aggregation_functions + title: BasicScoringFnParams + description: >- + Parameters for basic scoring function configuration. + BooleanType: + type: object + properties: + type: + type: string + const: boolean + default: boolean + description: Discriminator type. Always "boolean" + additionalProperties: false + required: + - type + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + type: object + properties: + type: + type: string + const: chat_completion_input + default: chat_completion_input + description: >- + Discriminator type. Always "chat_completion_input" + additionalProperties: false + required: + - type + title: ChatCompletionInputType + description: >- + Parameter type for chat completion input. + CompletionInputType: + type: object + properties: + type: + type: string + const: completion_input + default: completion_input + description: >- + Discriminator type. Always "completion_input" + additionalProperties: false + required: + - type + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + type: object + properties: + type: + type: string + const: json + default: json + description: Discriminator type. Always "json" + additionalProperties: false + required: + - type + title: JsonType + description: Parameter type for JSON values. + LLMAsJudgeScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: llm_as_judge + default: llm_as_judge + description: >- + The type of scoring function parameters, always llm_as_judge + judge_model: + type: string + description: >- + Identifier of the LLM model to use as a judge for scoring + prompt_template: + type: string + description: >- + (Optional) Custom prompt template for the judge model + judge_score_regexes: + type: array + items: + type: string + description: >- + Regexes to extract the answer from generated response + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - judge_model + - judge_score_regexes + - aggregation_functions + title: LLMAsJudgeScoringFnParams + description: >- + Parameters for LLM-as-judge scoring function configuration. + NumberType: + type: object + properties: + type: + type: string + const: number + default: number + description: Discriminator type. Always "number" + additionalProperties: false + required: + - type + title: NumberType + description: Parameter type for numeric values. + ObjectType: + type: object + properties: + type: + type: string + const: object + default: object + description: Discriminator type. Always "object" + additionalProperties: false + required: + - type + title: ObjectType + description: Parameter type for object values. + RegexParserScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: regex_parser + default: regex_parser + description: >- + The type of scoring function parameters, always regex_parser + parsing_regexes: + type: array + items: + type: string + description: >- + Regex to extract the answer from generated response + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - parsing_regexes + - aggregation_functions + title: RegexParserScoringFnParams + description: >- + Parameters for regex parser scoring function configuration. + ScoringFn: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: scoring_function + default: scoring_function + description: >- + The resource type, always scoring_function + description: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + return_type: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + params: + $ref: '#/components/schemas/ScoringFnParams' + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - return_type + title: ScoringFn + description: >- + A scoring function resource for evaluating model outputs. + ScoringFnParams: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + - $ref: '#/components/schemas/RegexParserScoringFnParams' + - $ref: '#/components/schemas/BasicScoringFnParams' + discriminator: + propertyName: type + mapping: + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + basic: '#/components/schemas/BasicScoringFnParams' + ScoringFnParamsType: + type: string + enum: + - llm_as_judge + - regex_parser + - basic + title: ScoringFnParamsType + description: >- + Types of scoring function parameter configurations. + StringType: + type: object + properties: + type: + type: string + const: string + default: string + description: Discriminator type. Always "string" + additionalProperties: false + required: + - type + title: StringType + description: Parameter type for string values. + UnionType: + type: object + properties: + type: + type: string + const: union + default: union + description: Discriminator type. Always "union" + additionalProperties: false + required: + - type + title: UnionType + description: Parameter type for union values. + ListScoringFunctionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ScoringFn' + additionalProperties: false + required: + - data + title: ListScoringFunctionsResponse + ParamType: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + RegisterScoringFunctionRequest: + type: object + properties: + scoring_fn_id: + type: string + description: >- + The ID of the scoring function to register. + description: + type: string + description: The description of the scoring function. + return_type: + $ref: '#/components/schemas/ParamType' + description: The return type of the scoring function. + provider_scoring_fn_id: + type: string + description: >- + The ID of the provider scoring function to use for the scoring function. + provider_id: + type: string + description: >- + The ID of the provider to use for the scoring function. + params: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + The parameters for the scoring function for benchmark eval, these can + be overridden for app eval. + additionalProperties: false + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + ScoreRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + additionalProperties: false + required: + - input_rows + - scoring_functions + title: ScoreRequest + ScoreResponse: + type: object + properties: + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult. + additionalProperties: false + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoringResult: + type: object + properties: + score_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The scoring result for each row. Each row is a map of column name to value. + aggregated_results: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Map of metric name to aggregated value + additionalProperties: false + required: + - score_rows + - aggregated_results + title: ScoringResult + description: A scoring result for a single row. + ScoreBatchRequest: + type: object + properties: + dataset_id: + type: string + description: The ID of the dataset to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + save_results_dataset: + type: boolean + description: >- + Whether to save the results to a dataset. + additionalProperties: false + required: + - dataset_id + - scoring_functions + - save_results_dataset + title: ScoreBatchRequest + ScoreBatchResponse: + type: object + properties: + dataset_id: + type: string + description: >- + (Optional) The identifier of the dataset that was scored + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult + additionalProperties: false + required: + - results + title: ScoreBatchResponse + description: >- + Response from batch scoring operations on datasets. + Shield: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: shield + default: shield + description: The resource type, always shield + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Configuration parameters for the shield + additionalProperties: false + required: + - identifier + - provider_id + - type + title: Shield + description: >- + A safety shield resource that can be used to check content. + ListShieldsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Shield' + additionalProperties: false + required: + - data + title: ListShieldsResponse + RegisterShieldRequest: + type: object + properties: + shield_id: + type: string + description: >- + The identifier of the shield to register. + provider_shield_id: + type: string + description: >- + The identifier of the shield in the provider. + provider_id: + type: string + description: The identifier of the provider. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + title: RegisterShieldRequest + CompletionMessage: + type: object + properties: + role: + type: string + const: assistant + default: assistant + description: >- + Must be "assistant" to identify this as the model's response + content: + $ref: '#/components/schemas/InterleavedContent' + description: The content of the model's response + stop_reason: + type: string + enum: + - end_of_turn + - end_of_message + - out_of_tokens + description: >- + Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: + The model finished generating the entire response. - `StopReason.end_of_message`: + The model finished generating but generated a partial response -- usually, + a tool call. The user may call the tool and continue the conversation + with the tool's response. - `StopReason.out_of_tokens`: The model ran + out of token budget. + tool_calls: + type: array + items: + $ref: '#/components/schemas/ToolCall' + description: >- + List of tool calls. Each tool call is a ToolCall object. + additionalProperties: false + required: + - role + - content + - stop_reason + title: CompletionMessage + description: >- + A message containing the model's (assistant) response in a chat conversation. + ImageContentItem: + type: object + properties: + type: + type: string + const: image + default: image + description: >- + Discriminator type of the content item. Always "image" + image: + type: object + properties: + url: + $ref: '#/components/schemas/URL' + description: >- + A URL of the image or data URL in the format of data:image/{type};base64,{data}. + Note that URL could have length limits. + data: + type: string + contentEncoding: base64 + description: base64 encoded image data as string + additionalProperties: false + description: >- + Image as a base64 encoded string or an URL + additionalProperties: false + required: + - type + - image + title: ImageContentItem + description: A image content item + InterleavedContent: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + InterleavedContentItem: + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + - $ref: '#/components/schemas/TextContentItem' + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + Message: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/SystemMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + - $ref: '#/components/schemas/CompletionMessage' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/UserMessage' + system: '#/components/schemas/SystemMessage' + tool: '#/components/schemas/ToolResponseMessage' + assistant: '#/components/schemas/CompletionMessage' + SystemMessage: + type: object + properties: + role: + type: string + const: system + default: system + description: >- + Must be "system" to identify this as a system message + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the "system prompt". If multiple system messages are provided, + they are concatenated. The underlying Llama Stack code may also add other + system messages (for example, for formatting tool definitions). + additionalProperties: false + required: + - role + - content + title: SystemMessage + description: >- + A system message providing instructions or context to the model. + TextContentItem: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Discriminator type of the content item. Always "text" + text: + type: string + description: Text content + additionalProperties: false + required: + - type + - text + title: TextContentItem + description: A text content item + ToolCall: + type: object + properties: + call_id: + type: string + tool_name: + oneOf: + - type: string + enum: + - brave_search + - wolfram_alpha + - photogen + - code_interpreter + title: BuiltinTool + - type: string + arguments: + type: string + additionalProperties: false + required: + - call_id + - tool_name + - arguments + title: ToolCall + ToolResponseMessage: + type: object + properties: + role: + type: string + const: tool + default: tool + description: >- + Must be "tool" to identify this as a tool response + call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + content: + $ref: '#/components/schemas/InterleavedContent' + description: The response content from the tool + additionalProperties: false + required: + - role + - call_id + - content + title: ToolResponseMessage + description: >- + A message representing the result of a tool invocation. + URL: + type: object + properties: + uri: + type: string + description: The URL string pointing to the resource + additionalProperties: false + required: + - uri + title: URL + description: A URL reference to external content. + UserMessage: + type: object + properties: + role: + type: string + const: user + default: user + description: >- + Must be "user" to identify this as a user message + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the message, which can include text and other media + context: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) This field is used internally by Llama Stack to pass RAG context. + This field may be removed in the API in the future. + additionalProperties: false + required: + - role + - content + title: UserMessage + description: >- + A message from the user in a chat conversation. + SyntheticDataGenerateRequest: + type: object + properties: + dialogs: + type: array + items: + $ref: '#/components/schemas/Message' + description: >- + List of conversation messages to use as input for synthetic data generation + filtering_function: + type: string + enum: + - none + - random + - top_k + - top_p + - top_k_top_p + - sigmoid + description: >- + Type of filtering to apply to generated synthetic data samples + model: + type: string + description: >- + (Optional) The identifier of the model to use. The model must be registered + with Llama Stack and available via the /models endpoint + additionalProperties: false + required: + - dialogs + - filtering_function + title: SyntheticDataGenerateRequest + SyntheticDataGenerationResponse: + type: object + properties: + synthetic_data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + List of generated synthetic data samples that passed the filtering criteria + statistics: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Statistical information about the generation process and filtering + results + additionalProperties: false + required: + - synthetic_data + title: SyntheticDataGenerationResponse + description: >- + Response from the synthetic data generation. Batch of (prompt, response, score) + tuples that pass the threshold. + InvokeToolRequest: + type: object + properties: + tool_name: + type: string + description: The name of the tool to invoke. + kwargs: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool. + additionalProperties: false + required: + - tool_name + - kwargs + title: InvokeToolRequest + ToolInvocationResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The output content from the tool execution + error_message: + type: string + description: >- + (Optional) Error message if the tool execution failed + error_code: + type: integer + description: >- + (Optional) Numeric error code if the tool execution failed + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool execution + additionalProperties: false + title: ToolInvocationResult + description: Result of a tool invocation. + ToolDef: + type: object + properties: + toolgroup_id: + type: string + description: >- + (Optional) ID of the tool group this tool belongs to + name: + type: string + description: Name of the tool + description: + type: string + description: >- + (Optional) Human-readable description of what the tool does + input_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON Schema for tool inputs (MCP inputSchema) + output_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON Schema for tool outputs (MCP outputSchema) + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool + additionalProperties: false + required: + - name + title: ToolDef + description: >- + Tool definition used in runtime contexts. + ListToolDefsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolDef' + description: List of tool definitions + additionalProperties: false + required: + - data + title: ListToolDefsResponse + description: >- + Response containing a list of tool definitions. + RAGDocument: + type: object + properties: + document_id: + type: string + description: The unique identifier for the document. + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the document. + mime_type: + type: string + description: The MIME type of the document. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the document. + additionalProperties: false + required: + - document_id + - content + - metadata + title: RAGDocument + description: >- + A document to be used for document ingestion in the RAG Tool. + InsertRequest: + type: object + properties: + documents: + type: array + items: + $ref: '#/components/schemas/RAGDocument' + description: >- + List of documents to index in the RAG system + vector_db_id: + type: string + description: >- + ID of the vector database to store the document embeddings + chunk_size_in_tokens: + type: integer + description: >- + (Optional) Size in tokens for document chunking during indexing + additionalProperties: false + required: + - documents + - vector_db_id + - chunk_size_in_tokens + title: InsertRequest + DefaultRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: default + default: default + description: >- + Type of query generator, always 'default' + separator: + type: string + default: ' ' + description: >- + String separator used to join query terms + additionalProperties: false + required: + - type + - separator + title: DefaultRAGQueryGeneratorConfig + description: >- + Configuration for the default RAG query generator. + LLMRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: llm + default: llm + description: Type of query generator, always 'llm' + model: + type: string + description: >- + Name of the language model to use for query generation + template: + type: string + description: >- + Template string for formatting the query generation prompt + additionalProperties: false + required: + - type + - model + - template + title: LLMRAGQueryGeneratorConfig + description: >- + Configuration for the LLM-based RAG query generator. + RAGQueryConfig: + type: object + properties: + query_generator_config: + oneOf: + - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' + discriminator: + propertyName: type + mapping: + default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' + description: Configuration for the query generator. + max_tokens_in_context: + type: integer + default: 4096 + description: Maximum number of tokens in the context. + max_chunks: + type: integer + default: 5 + description: Maximum number of chunks to retrieve. + chunk_template: + type: string + default: > + Result {index} + + Content: {chunk.content} + + Metadata: {metadata} + description: >- + Template for formatting each retrieved chunk in the context. Available + placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: + {chunk.content}\nMetadata: {metadata}\n" + mode: + $ref: '#/components/schemas/RAGSearchMode' + default: vector + description: >- + Search mode for retrieval—either "vector", "keyword", or "hybrid". Default + "vector". + ranker: + $ref: '#/components/schemas/Ranker' + description: >- + Configuration for the ranker to use in hybrid search. Defaults to RRF + ranker. + additionalProperties: false + required: + - query_generator_config + - max_tokens_in_context + - max_chunks + - chunk_template + title: RAGQueryConfig + description: >- + Configuration for the RAG query generation. + RAGSearchMode: + type: string + enum: + - vector + - keyword + - hybrid + title: RAGSearchMode + description: >- + Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search + for semantic matching - KEYWORD: Uses keyword-based search for exact matching + - HYBRID: Combines both vector and keyword search for better results + RRFRanker: + type: object + properties: + type: + type: string + const: rrf + default: rrf + description: The type of ranker, always "rrf" + impact_factor: + type: number + default: 60.0 + description: >- + The impact factor for RRF scoring. Higher values give more weight to higher-ranked + results. Must be greater than 0 + additionalProperties: false + required: + - type + - impact_factor + title: RRFRanker + description: >- + Reciprocal Rank Fusion (RRF) ranker configuration. + Ranker: + oneOf: + - $ref: '#/components/schemas/RRFRanker' + - $ref: '#/components/schemas/WeightedRanker' + discriminator: + propertyName: type + mapping: + rrf: '#/components/schemas/RRFRanker' + weighted: '#/components/schemas/WeightedRanker' + WeightedRanker: + type: object + properties: + type: + type: string + const: weighted + default: weighted + description: The type of ranker, always "weighted" + alpha: + type: number + default: 0.5 + description: >- + Weight factor between 0 and 1. 0 means only use keyword scores, 1 means + only use vector scores, values in between blend both scores. + additionalProperties: false + required: + - type + - alpha + title: WeightedRanker + description: >- + Weighted ranker configuration that combines vector and keyword scores. + QueryRequest: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The query content to search for in the indexed documents + vector_db_ids: + type: array + items: + type: string + description: >- + List of vector database IDs to search within + query_config: + $ref: '#/components/schemas/RAGQueryConfig' + description: >- + (Optional) Configuration parameters for the query operation + additionalProperties: false + required: + - content + - vector_db_ids + title: QueryRequest + RAGQueryResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The retrieved content from the query + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Additional metadata about the query result + additionalProperties: false + required: + - metadata + title: RAGQueryResult + description: >- + Result of a RAG query containing retrieved content and metadata. + ToolGroup: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool_group + default: tool_group + description: Type of resource, always 'tool_group' + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + (Optional) Model Context Protocol endpoint for remote tools + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional arguments for the tool group + additionalProperties: false + required: + - identifier + - provider_id + - type + title: ToolGroup + description: >- + A group of related tools managed together. + ListToolGroupsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolGroup' + description: List of tool groups + additionalProperties: false + required: + - data + title: ListToolGroupsResponse + description: >- + Response containing a list of tool groups. + RegisterToolGroupRequest: + type: object + properties: + toolgroup_id: + type: string + description: The ID of the tool group to register. + provider_id: + type: string + description: >- + The ID of the provider to use for the tool group. + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + The MCP endpoint to use for the tool group. + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool group. + additionalProperties: false + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. + InsertChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to insert the chunks into. + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + The chunks to insert. Each `Chunk` should contain content which can be + interleaved text, images, or other types. `metadata`: `dict[str, Any]` + and `embedding`: `List[float]` are optional. If `metadata` is provided, + you configure how Llama Stack formats the chunk during generation. If + `embedding` is not provided, it will be computed later. + ttl_seconds: + type: integer + description: The time to live of the chunks. + additionalProperties: false + required: + - vector_db_id + - chunks + title: InsertChunksRequest + QueryChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to query. + query: + $ref: '#/components/schemas/InterleavedContent' + description: The query to search for. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the query. + additionalProperties: false + required: + - vector_db_id + - query + title: QueryChunksRequest + QueryChunksResponse: + type: object + properties: + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + List of content chunks returned from the query + scores: + type: array + items: + type: number + description: >- + Relevance scores corresponding to each returned chunk + additionalProperties: false + required: + - chunks + - scores + title: QueryChunksResponse + description: >- + Response from querying chunks in a vector database. + VectorStoreFileCounts: + type: object + properties: + completed: + type: integer + description: >- + Number of files that have been successfully processed + cancelled: + type: integer + description: >- + Number of files that had their processing cancelled + failed: + type: integer + description: Number of files that failed to process + in_progress: + type: integer + description: >- + Number of files currently being processed + total: + type: integer + description: >- + Total number of files in the vector store + additionalProperties: false + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: >- + File processing status counts for a vector store. + VectorStoreListResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreObject' + description: List of vector store objects + first_id: + type: string + description: >- + (Optional) ID of the first vector store in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last vector store in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more vector stores available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreListResponse + description: Response from listing vector stores. + VectorStoreObject: + type: object + properties: + id: + type: string + description: Unique identifier for the vector store + object: + type: string + default: vector_store + description: >- + Object type identifier, always "vector_store" + created_at: + type: integer + description: >- + Timestamp when the vector store was created + name: + type: string + description: (Optional) Name of the vector store + usage_bytes: + type: integer + default: 0 + description: >- + Storage space used by the vector store in bytes + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + description: >- + File processing status counts for the vector store + status: + type: string + default: completed + description: Current status of the vector store + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Expiration policy for the vector store + expires_at: + type: integer + description: >- + (Optional) Timestamp when the vector store will expire + last_active_at: + type: integer + description: >- + (Optional) Timestamp of last activity on the vector store + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of key-value pairs that can be attached to the vector store + additionalProperties: false + required: + - id + - object + - created_at + - usage_bytes + - file_counts + - status + - metadata + title: VectorStoreObject + description: OpenAI Vector Store object. + "OpenAICreateVectorStoreRequestWithExtraBody": + type: object + properties: + name: + type: string + description: (Optional) A name for the vector store + file_ids: + type: array + items: + type: string + description: >- + List of file IDs to include in the vector store + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Expiration policy for the vector store + chunking_strategy: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Strategy for splitting files into chunks + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of key-value pairs that can be attached to the vector store + additionalProperties: false + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + type: object + properties: + name: + type: string + description: The name of the vector store. + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The expiration policy for a vector store. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of 16 key-value pairs that can be attached to an object. + additionalProperties: false + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: + type: object + properties: + id: + type: string + description: >- + Unique identifier of the deleted vector store + object: + type: string + default: vector_store.deleted + description: >- + Object type identifier for the deletion response + deleted: + type: boolean + default: true + description: >- + Whether the deletion operation was successful + additionalProperties: false + required: + - id + - object + - deleted + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": + type: object + properties: + file_ids: + type: array + items: + type: string + description: >- + A list of File IDs that the vector store should use + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value attributes to store with the files + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + (Optional) The chunking strategy used to chunk the file(s). Defaults to + auto + additionalProperties: false + required: + - file_ids + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file batch + object: + type: string + default: vector_store.file_batch + description: >- + Object type identifier, always "vector_store.file_batch" + created_at: + type: integer + description: >- + Timestamp when the file batch was created + vector_store_id: + type: string + description: >- + ID of the vector store containing the file batch + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: >- + Current processing status of the file batch + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + description: >- + File processing status counts for the batch + additionalProperties: false + required: + - id + - object + - created_at + - vector_store_id + - status + - file_counts + title: VectorStoreFileBatchObject + description: OpenAI Vector Store File Batch object. + VectorStoreFileStatus: + oneOf: + - type: string + const: completed + - type: string + const: in_progress + - type: string + const: cancelled + - type: string + const: failed + VectorStoreFileLastError: + type: object + properties: + code: + oneOf: + - type: string + const: server_error + - type: string + const: rate_limit_exceeded + description: >- + Error code indicating the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: VectorStoreFileLastError + description: >- + Error information for failed vector store file processing. + VectorStoreFileObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file + object: + type: string + default: vector_store.file + description: >- + Object type identifier, always "vector_store.file" + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + chunking_strategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + description: >- + Strategy used for splitting the file into chunks + created_at: + type: integer + description: >- + Timestamp when the file was added to the vector store + last_error: + $ref: '#/components/schemas/VectorStoreFileLastError' + description: >- + (Optional) Error information if file processing failed + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: Current processing status of the file + usage_bytes: + type: integer + default: 0 + description: Storage space used by this file in bytes + vector_store_id: + type: string + description: >- + ID of the vector store containing this file + additionalProperties: false + required: + - id + - object + - attributes + - chunking_strategy + - created_at + - status + - usage_bytes + - vector_store_id + title: VectorStoreFileObject + description: OpenAI Vector Store File object. + VectorStoreFilesListInBatchResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreFileObject' + description: >- + List of vector store file objects in the batch + first_id: + type: string + description: >- + (Optional) ID of the first file in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last file in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more files available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreFilesListInBatchResponse + description: >- + Response from listing files in a vector store file batch. + VectorStoreListFilesResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreFileObject' + description: List of vector store file objects + first_id: + type: string + description: >- + (Optional) ID of the first file in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last file in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more files available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreListFilesResponse + description: >- + Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: + type: object + properties: + file_id: + type: string + description: >- + The ID of the file to attach to the vector store. + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The key-value attributes stored with the file, which can be used for filtering. + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + The chunking strategy to use for the file. + additionalProperties: false + required: + - file_id + title: OpenaiAttachFileToVectorStoreRequest + OpenaiUpdateVectorStoreFileRequest: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The updated key-value attributes to store with the file. + additionalProperties: false + required: + - attributes + title: OpenaiUpdateVectorStoreFileRequest + VectorStoreFileDeleteResponse: + type: object + properties: + id: + type: string + description: Unique identifier of the deleted file + object: + type: string + default: vector_store.file.deleted + description: >- + Object type identifier for the deletion response + deleted: + type: boolean + default: true + description: >- + Whether the deletion operation was successful + additionalProperties: false + required: + - id + - object + - deleted + title: VectorStoreFileDeleteResponse + description: >- + Response from deleting a vector store file. + VectorStoreContent: + type: object + properties: + type: + type: string + const: text + description: >- + Content type, currently only "text" is supported + text: + type: string + description: The actual text content + additionalProperties: false + required: + - type + - text + title: VectorStoreContent + description: >- + Content item from a vector store file or search result. + VectorStoreFileContentsResponse: + type: object + properties: + file_id: + type: string + description: Unique identifier for the file + filename: + type: string + description: Name of the file + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + content: + type: array + items: + $ref: '#/components/schemas/VectorStoreContent' + description: List of content items from the file + additionalProperties: false + required: + - file_id + - filename + - attributes + - content + title: VectorStoreFileContentsResponse + description: >- + Response from retrieving the contents of a vector store file. + OpenaiSearchVectorStoreRequest: + type: object + properties: + query: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + The query string or array for performing the search. + filters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Filters based on file attributes to narrow the search results. + max_num_results: + type: integer + description: >- + Maximum number of results to return (1 to 50 inclusive, default 10). + ranking_options: + type: object + properties: + ranker: + type: string + description: >- + (Optional) Name of the ranking algorithm to use + score_threshold: + type: number + default: 0.0 + description: >- + (Optional) Minimum relevance score threshold for results + additionalProperties: false + description: >- + Ranking options for fine-tuning the search results. + rewrite_query: + type: boolean + description: >- + Whether to rewrite the natural language query for vector search (default + false) + search_mode: + type: string + description: >- + The search mode to use - "keyword", "vector", or "hybrid" (default "vector") + additionalProperties: false + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + type: object + properties: + file_id: + type: string + description: >- + Unique identifier of the file containing the result + filename: + type: string + description: Name of the file containing the result + score: + type: number + description: Relevance score for this search result + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: number + - type: boolean + description: >- + (Optional) Key-value attributes associated with the file + content: + type: array + items: + $ref: '#/components/schemas/VectorStoreContent' + description: >- + List of content items matching the search query + additionalProperties: false + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + type: object + properties: + object: + type: string + default: vector_store.search_results.page + description: >- + Object type identifier for the search results page + search_query: + type: string + description: >- + The original search query that was executed + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + description: List of search result objects + has_more: + type: boolean + default: false + description: >- + Whether there are more results available beyond this page + next_page: + type: string + description: >- + (Optional) Token for retrieving the next page of results + additionalProperties: false + required: + - object + - search_query + - data + - has_more + title: VectorStoreSearchResponsePage + description: >- + Paginated response from searching a vector store. + VersionInfo: + type: object + properties: + version: + type: string + description: Version number of the service + additionalProperties: false + required: + - version + title: VersionInfo + description: Version information for the service. + AppendRowsRequest: + type: object + properties: + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to append to the dataset. + additionalProperties: false + required: + - rows + title: AppendRowsRequest + PaginatedResponse: + type: object + properties: + data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The list of items for the current page + has_more: + type: boolean + description: >- + Whether there are more items available after this set + url: + type: string + description: The URL for accessing this list + additionalProperties: false + required: + - data + - has_more + title: PaginatedResponse + description: >- + A generic paginated response that follows a simple format. + Dataset: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: dataset + default: dataset + description: >- + Type of resource, always 'dataset' for datasets + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + Purpose of the dataset indicating its intended use + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + description: >- + Data source configuration for the dataset + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the dataset + additionalProperties: false + required: + - identifier + - provider_id + - type + - purpose + - source + - metadata + title: Dataset + description: >- + Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + type: object + properties: + type: + type: string + const: rows + default: rows + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", + "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, + world!"}]} ] + additionalProperties: false + required: + - type + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + type: object + properties: + type: + type: string + const: uri + default: uri + uri: + type: string + description: >- + The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" + additionalProperties: false + required: + - type + - uri + title: URIDataSource + description: >- + A dataset that can be obtained from a URI. + ListDatasetsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Dataset' + description: List of datasets + additionalProperties: false + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + DataSource: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + RegisterDatasetRequest: + type: object + properties: + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + The purpose of the dataset. One of: - "post-training/messages": The dataset + contains a messages column with list of messages for post-training. { + "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", + "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset + contains a question column and an answer column for evaluation. { "question": + "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": + The dataset contains a messages column with list of messages and an answer + column for evaluation. { "messages": [ {"role": "user", "content": "Hello, + my name is John Doe."}, {"role": "assistant", "content": "Hello, John + Doe. How can I help you today?"}, {"role": "user", "content": "What's + my name?"}, ], "answer": "John Doe" } + source: + $ref: '#/components/schemas/DataSource' + description: >- + The data source of the dataset. Ensure that the data source schema is + compatible with the purpose of the dataset. Examples: - { "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": + "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" + } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" + } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The metadata for the dataset. - E.g. {"description": "My dataset"}. + dataset_id: + type: string + description: >- + The ID of the dataset. If not provided, an ID will be generated. + additionalProperties: false + required: + - purpose + - source + title: RegisterDatasetRequest + AgentConfig: + type: object + properties: + sampling_params: + $ref: '#/components/schemas/SamplingParams' + input_shields: + type: array + items: + type: string + output_shields: + type: array + items: + type: string + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + client_tools: + type: array + items: + $ref: '#/components/schemas/ToolDef' + tool_choice: + type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following capabilities + of the model. + deprecated: true + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + title: ToolPromptFormat + description: >- + Prompt format for calling custom / zero shot tools. + deprecated: true + tool_config: + $ref: '#/components/schemas/ToolConfig' + max_infer_iters: + type: integer + default: 10 + model: + type: string + description: >- + The model identifier to use for the agent + instructions: + type: string + description: The system instructions for the agent + name: + type: string + description: >- + Optional name for the agent, used in telemetry and identification + enable_session_persistence: + type: boolean + default: false + description: >- + Optional flag indicating whether session data has to be persisted + response_format: + $ref: '#/components/schemas/ResponseFormat' + description: Optional response format configuration + additionalProperties: false + required: + - model + - instructions + title: AgentConfig + description: Configuration for an agent. + AgentTool: + oneOf: + - type: string + - type: object + properties: + name: + type: string + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - args + title: AgentToolGroupWithArgs + GrammarResponseFormat: + type: object + properties: + type: + type: string + enum: + - json_schema + - grammar + description: >- + Must be "grammar" to identify this format type + const: grammar + default: grammar + bnf: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The BNF grammar specification the response should conform to + additionalProperties: false + required: + - type + - bnf + title: GrammarResponseFormat + description: >- + Configuration for grammar-guided response generation. + GreedySamplingStrategy: + type: object + properties: + type: + type: string + const: greedy + default: greedy + description: >- + Must be "greedy" to identify this sampling strategy + additionalProperties: false + required: + - type + title: GreedySamplingStrategy + description: >- + Greedy sampling strategy that selects the highest probability token at each + step. + JsonSchemaResponseFormat: + type: object + properties: + type: + type: string + enum: + - json_schema + - grammar + description: >- + Must be "json_schema" to identify this format type + const: json_schema + default: json_schema + json_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The JSON schema the response should conform to. In a Python SDK, this + is often a `pydantic` model. + additionalProperties: false + required: + - type + - json_schema + title: JsonSchemaResponseFormat + description: >- + Configuration for JSON schema-guided response generation. + ResponseFormat: + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + - $ref: '#/components/schemas/GrammarResponseFormat' + discriminator: + propertyName: type + mapping: + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + grammar: '#/components/schemas/GrammarResponseFormat' + SamplingParams: + type: object + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + - $ref: '#/components/schemas/TopPSamplingStrategy' + - $ref: '#/components/schemas/TopKSamplingStrategy' + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + description: The sampling strategy. + max_tokens: + type: integer + default: 0 + description: >- + The maximum number of tokens that can be generated in the completion. + The token count of your prompt plus max_tokens cannot exceed the model's + context length. + repetition_penalty: + type: number + default: 1.0 + description: >- + Number between -2.0 and 2.0. Positive values penalize new tokens based + on whether they appear in the text so far, increasing the model's likelihood + to talk about new topics. + stop: + type: array + items: + type: string + description: >- + Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + additionalProperties: false + required: + - strategy + title: SamplingParams + description: Sampling parameters. + ToolConfig: + type: object + properties: + tool_choice: + oneOf: + - type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following + capabilities of the model. + - type: string + default: auto + description: >- + (Optional) Whether tool use is automatic, required, or none. Can also + specify a tool name to use a specific tool. Defaults to ToolChoice.auto. + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + description: >- + (Optional) Instructs the model how to format tool calls. By default, Llama + Stack will attempt to use a format that is best adapted to the model. + - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. + - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python + syntax -- a list of function calls. + system_message_behavior: + type: string + enum: + - append + - replace + description: >- + (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: + Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: + Replaces the default system prompt with the provided system message. The + system message can include the string '{{function_definitions}}' to indicate + where the function definitions should be inserted. + default: append + additionalProperties: false + title: ToolConfig + description: Configuration for tool use. + TopKSamplingStrategy: + type: object + properties: + type: + type: string + const: top_k + default: top_k + description: >- + Must be "top_k" to identify this sampling strategy + top_k: + type: integer + description: >- + Number of top tokens to consider for sampling. Must be at least 1 + additionalProperties: false + required: + - type + - top_k + title: TopKSamplingStrategy + description: >- + Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + type: object + properties: + type: + type: string + const: top_p + default: top_p + description: >- + Must be "top_p" to identify this sampling strategy + temperature: + type: number + description: >- + Controls randomness in sampling. Higher values increase randomness + top_p: + type: number + default: 0.95 + description: >- + Cumulative probability threshold for nucleus sampling. Defaults to 0.95 + additionalProperties: false + required: + - type + title: TopPSamplingStrategy + description: >- + Top-p (nucleus) sampling strategy that samples from the smallest set of tokens + with cumulative probability >= p. + CreateAgentRequest: + type: object + properties: + agent_config: + $ref: '#/components/schemas/AgentConfig' + description: The configuration for the agent. + additionalProperties: false + required: + - agent_config + title: CreateAgentRequest + AgentCreateResponse: + type: object + properties: + agent_id: + type: string + description: Unique identifier for the created agent + additionalProperties: false + required: + - agent_id + title: AgentCreateResponse + description: >- + Response returned when creating a new agent. + Agent: + type: object + properties: + agent_id: + type: string + description: Unique identifier for the agent + agent_config: + $ref: '#/components/schemas/AgentConfig' + description: Configuration settings for the agent + created_at: + type: string + format: date-time + description: Timestamp when the agent was created + additionalProperties: false + required: + - agent_id + - agent_config + - created_at + title: Agent + description: >- + An agent instance with configuration and metadata. + CreateAgentSessionRequest: + type: object + properties: + session_name: + type: string + description: The name of the session to create. + additionalProperties: false + required: + - session_name + title: CreateAgentSessionRequest + AgentSessionCreateResponse: + type: object + properties: + session_id: + type: string + description: >- + Unique identifier for the created session + additionalProperties: false + required: + - session_id + title: AgentSessionCreateResponse + description: >- + Response returned when creating a new agent session. + InferenceStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: inference + default: inference + model_response: + $ref: '#/components/schemas/CompletionMessage' + description: The response from the LLM. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - model_response + title: InferenceStep + description: An inference step in an agent turn. + MemoryRetrievalStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: memory_retrieval + default: memory_retrieval + vector_db_ids: + type: string + description: >- + The IDs of the vector databases to retrieve context from. + inserted_context: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The context retrieved from the vector databases. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - vector_db_ids + - inserted_context + title: MemoryRetrievalStep + description: >- + A memory retrieval step in an agent turn. + Session: + type: object + properties: + session_id: + type: string + description: >- + Unique identifier for the conversation session + session_name: + type: string + description: Human-readable name for the session + turns: + type: array + items: + $ref: '#/components/schemas/Turn' + description: >- + List of all turns that have occurred in this session + started_at: + type: string + format: date-time + description: Timestamp when the session was created + additionalProperties: false + required: + - session_id + - session_name + - turns + - started_at + title: Session + description: >- + A single session of an interaction with an Agentic System. + ShieldCallStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: shield_call + default: shield_call + violation: + $ref: '#/components/schemas/SafetyViolation' + description: The violation from the shield call. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + title: ShieldCallStep + description: A shield call step in an agent turn. + ToolExecutionStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: tool_execution + default: tool_execution + tool_calls: + type: array + items: + $ref: '#/components/schemas/ToolCall' + description: The tool calls to execute. + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: The tool responses from the tool calls. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - tool_calls + - tool_responses + title: ToolExecutionStep + description: A tool execution step in an agent turn. + ToolResponse: + type: object + properties: + call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + tool_name: + oneOf: + - type: string + enum: + - brave_search + - wolfram_alpha + - photogen + - code_interpreter + title: BuiltinTool + - type: string + description: Name of the tool that was invoked + content: + $ref: '#/components/schemas/InterleavedContent' + description: The response content from the tool + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool response + additionalProperties: false + required: + - call_id + - tool_name + - content + title: ToolResponse + description: Response from a tool invocation. + Turn: + type: object + properties: + turn_id: + type: string + description: >- + Unique identifier for the turn within a session + session_id: + type: string + description: >- + Unique identifier for the conversation session + input_messages: + type: array + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + description: >- + List of messages that initiated this turn + steps: + type: array + items: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: >- + Ordered list of processing steps executed during this turn + output_message: + $ref: '#/components/schemas/CompletionMessage' + description: >- + The model's generated response containing content and metadata + output_attachments: + type: array + items: + type: object + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the attachment. + mime_type: + type: string + description: The MIME type of the attachment. + additionalProperties: false + required: + - content + - mime_type + title: Attachment + description: An attachment to an agent turn. + description: >- + (Optional) Files or media attached to the agent's response + started_at: + type: string + format: date-time + description: Timestamp when the turn began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the turn finished, if completed + additionalProperties: false + required: + - turn_id + - session_id + - input_messages + - steps + - output_message + - started_at + title: Turn + description: >- + A single turn in an interaction with an Agentic System. + CreateAgentTurnRequest: + type: object + properties: + messages: + type: array + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + description: List of messages to start the turn with. + stream: + type: boolean + description: >- + (Optional) If True, generate an SSE event stream of the response. Defaults + to False. + documents: + type: array + items: + type: object + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the document. + mime_type: + type: string + description: The MIME type of the document. + additionalProperties: false + required: + - content + - mime_type + title: Document + description: A document to be used by an agent. + description: >- + (Optional) List of documents to create the turn with. + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + description: >- + (Optional) List of toolgroups to create the turn with, will be used in + addition to the agent's config toolgroups for the request. + tool_config: + $ref: '#/components/schemas/ToolConfig' + description: >- + (Optional) The tool configuration to create the turn with, will be used + to override the agent's tool_config. + additionalProperties: false + required: + - messages + title: CreateAgentTurnRequest + AgentTurnResponseEvent: + type: object + properties: + payload: + oneOf: + - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' + discriminator: + propertyName: event_type + mapping: + step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' + step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' + step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' + turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' + turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' + description: >- + Event-specific payload containing event data + additionalProperties: false + required: + - payload + title: AgentTurnResponseEvent + description: >- + An event in an agent turn response stream. + AgentTurnResponseStepCompletePayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_complete + default: step_complete + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + step_details: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: Complete details of the executed step + additionalProperties: false + required: + - event_type + - step_type + - step_id + - step_details + title: AgentTurnResponseStepCompletePayload + description: >- + Payload for step completion events in agent turn responses. + AgentTurnResponseStepProgressPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_progress + default: step_progress + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + delta: + oneOf: + - $ref: '#/components/schemas/TextDelta' + - $ref: '#/components/schemas/ImageDelta' + - $ref: '#/components/schemas/ToolCallDelta' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/TextDelta' + image: '#/components/schemas/ImageDelta' + tool_call: '#/components/schemas/ToolCallDelta' + description: >- + Incremental content changes during step execution + additionalProperties: false + required: + - event_type + - step_type + - step_id + - delta + title: AgentTurnResponseStepProgressPayload + description: >- + Payload for step progress events in agent turn responses. + AgentTurnResponseStepStartPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_start + default: step_start + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata for the step + additionalProperties: false + required: + - event_type + - step_type + - step_id + title: AgentTurnResponseStepStartPayload + description: >- + Payload for step start events in agent turn responses. + AgentTurnResponseStreamChunk: + type: object + properties: + event: + $ref: '#/components/schemas/AgentTurnResponseEvent' + description: >- + Individual event in the agent turn response stream + additionalProperties: false + required: + - event + title: AgentTurnResponseStreamChunk + description: Streamed agent turn completion response. + "AgentTurnResponseTurnAwaitingInputPayload": + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_awaiting_input + default: turn_awaiting_input + description: Type of event being reported + turn: + $ref: '#/components/schemas/Turn' + description: >- + Turn data when waiting for external tool responses + additionalProperties: false + required: + - event_type + - turn + title: >- + AgentTurnResponseTurnAwaitingInputPayload + description: >- + Payload for turn awaiting input events in agent turn responses. + AgentTurnResponseTurnCompletePayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_complete + default: turn_complete + description: Type of event being reported + turn: + $ref: '#/components/schemas/Turn' + description: >- + Complete turn data including all steps and results + additionalProperties: false + required: + - event_type + - turn + title: AgentTurnResponseTurnCompletePayload + description: >- + Payload for turn completion events in agent turn responses. + AgentTurnResponseTurnStartPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_start + default: turn_start + description: Type of event being reported + turn_id: + type: string + description: >- + Unique identifier for the turn within a session + additionalProperties: false + required: + - event_type + - turn_id + title: AgentTurnResponseTurnStartPayload + description: >- + Payload for turn start events in agent turn responses. + ImageDelta: + type: object + properties: + type: + type: string + const: image + default: image + description: >- + Discriminator type of the delta. Always "image" + image: + type: string + contentEncoding: base64 + description: The incremental image data as bytes + additionalProperties: false + required: + - type + - image + title: ImageDelta + description: >- + An image content delta for streaming responses. + TextDelta: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Discriminator type of the delta. Always "text" + text: + type: string + description: The incremental text content + additionalProperties: false + required: + - type + - text + title: TextDelta + description: >- + A text content delta for streaming responses. + ToolCallDelta: + type: object + properties: + type: + type: string + const: tool_call + default: tool_call + description: >- + Discriminator type of the delta. Always "tool_call" + tool_call: + oneOf: + - type: string + - $ref: '#/components/schemas/ToolCall' + description: >- + Either an in-progress tool call string or the final parsed tool call + parse_status: + type: string + enum: + - started + - in_progress + - failed + - succeeded + description: Current parsing status of the tool call + additionalProperties: false + required: + - type + - tool_call + - parse_status + title: ToolCallDelta + description: >- + A tool call content delta for streaming responses. + ResumeAgentTurnRequest: + type: object + properties: + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: >- + The tool call responses to resume the turn with. + stream: + type: boolean + description: Whether to stream the response. + additionalProperties: false + required: + - tool_responses + title: ResumeAgentTurnRequest + AgentStepResponse: + type: object + properties: + step: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: >- + The complete step data and execution details + additionalProperties: false + required: + - step + title: AgentStepResponse + description: >- + Response containing details of a specific agent step. + Benchmark: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: benchmark + default: benchmark + description: The resource type, always benchmark + dataset_id: + type: string + description: >- + Identifier of the dataset to use for the benchmark evaluation + scoring_functions: + type: array + items: + type: string + description: >- + List of scoring function identifiers to apply during evaluation + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Metadata for this evaluation task + additionalProperties: false + required: + - identifier + - provider_id + - type + - dataset_id + - scoring_functions + - metadata + title: Benchmark + description: >- + A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Benchmark' + additionalProperties: false + required: + - data + title: ListBenchmarksResponse + RegisterBenchmarkRequest: + type: object + properties: + benchmark_id: + type: string + description: The ID of the benchmark to register. + dataset_id: + type: string + description: >- + The ID of the dataset to use for the benchmark. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the benchmark. + provider_benchmark_id: + type: string + description: >- + The ID of the provider benchmark to use for the benchmark. + provider_id: + type: string + description: >- + The ID of the provider to use for the benchmark. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The metadata to use for the benchmark. + additionalProperties: false + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + AgentCandidate: + type: object + properties: + type: + type: string + const: agent + default: agent + config: + $ref: '#/components/schemas/AgentConfig' + description: >- + The configuration for the agent candidate. + additionalProperties: false + required: + - type + - config + title: AgentCandidate + description: An agent candidate for evaluation. + BenchmarkConfig: + type: object + properties: + eval_candidate: + oneOf: + - $ref: '#/components/schemas/ModelCandidate' + - $ref: '#/components/schemas/AgentCandidate' + discriminator: + propertyName: type + mapping: + model: '#/components/schemas/ModelCandidate' + agent: '#/components/schemas/AgentCandidate' + description: The candidate to evaluate. + scoring_params: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + Map between scoring function id and parameters for each scoring function + you want to run + num_examples: + type: integer + description: >- + (Optional) The number of examples to evaluate. If not provided, all examples + in the dataset will be evaluated + additionalProperties: false + required: + - eval_candidate + - scoring_params + title: BenchmarkConfig + description: >- + A benchmark configuration for evaluation. + ModelCandidate: + type: object + properties: + type: + type: string + const: model + default: model + model: + type: string + description: The model ID to evaluate. + sampling_params: + $ref: '#/components/schemas/SamplingParams' + description: The sampling parameters for the model. + system_message: + $ref: '#/components/schemas/SystemMessage' + description: >- + (Optional) The system message providing instructions or context to the + model. + additionalProperties: false + required: + - type + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. + EvaluateRowsRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to evaluate. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the evaluation. + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - input_rows + - scoring_functions + - benchmark_config + title: EvaluateRowsRequest + EvaluateResponse: + type: object + properties: + generations: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The generations from the evaluation. + scores: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: The scores from the evaluation. + additionalProperties: false + required: + - generations + - scores + title: EvaluateResponse + description: The response from an evaluation. + RunEvalRequest: + type: object + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - benchmark_config + title: RunEvalRequest + Job: + type: object + properties: + job_id: + type: string + description: Unique identifier for the job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current execution status of the job + additionalProperties: false + required: + - job_id + - status + title: Job + description: >- + A job execution instance with status tracking. + RerankRequest: + type: object + properties: + model: + type: string + description: >- + The identifier of the reranking model to use. + query: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + The search query to rank items against. Can be a string, text content + part, or image content part. The input must not exceed the model's max + input token length. + items: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + List of items to rerank. Each item can be a string, text content part, + or image content part. Each input must not exceed the model's max input + token length. + max_num_results: + type: integer + description: >- + (Optional) Maximum number of results to return. Default: returns all. + additionalProperties: false + required: + - model + - query + - items + title: RerankRequest + RerankData: + type: object + properties: + index: + type: integer + description: >- + The original index of the document in the input list + relevance_score: + type: number + description: >- + The relevance score from the model output. Values are inverted when applicable + so that higher scores indicate greater relevance. + additionalProperties: false + required: + - index + - relevance_score + title: RerankData + description: >- + A single rerank result from a reranking response. + RerankResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RerankData' + description: >- + List of rerank result objects, sorted by relevance score (descending) + additionalProperties: false + required: + - data + title: RerankResponse + description: Response from a reranking request. + Checkpoint: + type: object + properties: + identifier: + type: string + description: Unique identifier for the checkpoint + created_at: + type: string + format: date-time + description: >- + Timestamp when the checkpoint was created + epoch: + type: integer + description: >- + Training epoch when the checkpoint was saved + post_training_job_id: + type: string + description: >- + Identifier of the training job that created this checkpoint + path: + type: string + description: >- + File system path where the checkpoint is stored + training_metrics: + $ref: '#/components/schemas/PostTrainingMetric' + description: >- + (Optional) Training metrics associated with this checkpoint + additionalProperties: false + required: + - identifier + - created_at + - epoch + - post_training_job_id + - path + title: Checkpoint + description: Checkpoint created during training runs. + PostTrainingJobArtifactsResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - checkpoints + title: PostTrainingJobArtifactsResponse + description: Artifacts of a finetuning job. + PostTrainingMetric: + type: object + properties: + epoch: + type: integer + description: Training epoch number + train_loss: + type: number + description: Loss value on the training dataset + validation_loss: + type: number + description: Loss value on the validation dataset + perplexity: + type: number + description: >- + Perplexity metric indicating model confidence + additionalProperties: false + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: >- + Training metrics captured during post-training jobs. + CancelTrainingJobRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to cancel. + additionalProperties: false + required: + - job_uuid + title: CancelTrainingJobRequest + PostTrainingJobStatusResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current status of the training job + scheduled_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job was scheduled + started_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job execution began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job finished, if completed + resources_allocated: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Information about computational resources allocated to the + job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - status + - checkpoints + title: PostTrainingJobStatusResponse + description: Status of a finetuning job. + ListPostTrainingJobsResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + additionalProperties: false + required: + - data + title: ListPostTrainingJobsResponse + DPOAlignmentConfig: + type: object + properties: + beta: + type: number + description: Temperature parameter for the DPO loss + loss_type: + $ref: '#/components/schemas/DPOLossType' + default: sigmoid + description: The type of loss function to use for DPO + additionalProperties: false + required: + - beta + - loss_type + title: DPOAlignmentConfig + description: >- + Configuration for Direct Preference Optimization (DPO) alignment. + DPOLossType: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + title: DPOLossType + DataConfig: + type: object + properties: + dataset_id: + type: string + description: >- + Unique identifier for the training dataset + batch_size: + type: integer + description: Number of samples per training batch + shuffle: + type: boolean + description: >- + Whether to shuffle the dataset during training + data_format: + $ref: '#/components/schemas/DatasetFormat' + description: >- + Format of the dataset (instruct or dialog) + validation_dataset_id: + type: string + description: >- + (Optional) Unique identifier for the validation dataset + packed: + type: boolean + default: false + description: >- + (Optional) Whether to pack multiple samples into a single sequence for + efficiency + train_on_input: + type: boolean + default: false + description: >- + (Optional) Whether to compute loss on input tokens as well as output tokens + additionalProperties: false + required: + - dataset_id + - batch_size + - shuffle + - data_format + title: DataConfig + description: >- + Configuration for training data and data loading. + DatasetFormat: + type: string + enum: + - instruct + - dialog + title: DatasetFormat + description: Format of the training dataset. + EfficiencyConfig: + type: object + properties: + enable_activation_checkpointing: + type: boolean + default: false + description: >- + (Optional) Whether to use activation checkpointing to reduce memory usage + enable_activation_offloading: + type: boolean + default: false + description: >- + (Optional) Whether to offload activations to CPU to save GPU memory + memory_efficient_fsdp_wrap: + type: boolean + default: false + description: >- + (Optional) Whether to use memory-efficient FSDP wrapping + fsdp_cpu_offload: + type: boolean + default: false + description: >- + (Optional) Whether to offload FSDP parameters to CPU + additionalProperties: false + title: EfficiencyConfig + description: >- + Configuration for memory and compute efficiency optimizations. + OptimizerConfig: + type: object + properties: + optimizer_type: + $ref: '#/components/schemas/OptimizerType' + description: >- + Type of optimizer to use (adam, adamw, or sgd) + lr: + type: number + description: Learning rate for the optimizer + weight_decay: + type: number + description: >- + Weight decay coefficient for regularization + num_warmup_steps: + type: integer + description: Number of steps for learning rate warmup + additionalProperties: false + required: + - optimizer_type + - lr + - weight_decay + - num_warmup_steps + title: OptimizerConfig + description: >- + Configuration parameters for the optimization algorithm. + OptimizerType: + type: string + enum: + - adam + - adamw + - sgd + title: OptimizerType + description: >- + Available optimizer algorithms for training. + TrainingConfig: + type: object + properties: + n_epochs: + type: integer + description: Number of training epochs to run + max_steps_per_epoch: + type: integer + default: 1 + description: Maximum number of steps to run per epoch + gradient_accumulation_steps: + type: integer + default: 1 + description: >- + Number of steps to accumulate gradients before updating + max_validation_steps: + type: integer + default: 1 + description: >- + (Optional) Maximum number of validation steps per epoch + data_config: + $ref: '#/components/schemas/DataConfig' + description: >- + (Optional) Configuration for data loading and formatting + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + description: >- + (Optional) Configuration for the optimization algorithm + efficiency_config: + $ref: '#/components/schemas/EfficiencyConfig' + description: >- + (Optional) Configuration for memory and compute optimizations + dtype: + type: string + default: bf16 + description: >- + (Optional) Data type for model parameters (bf16, fp16, fp32) + additionalProperties: false + required: + - n_epochs + - max_steps_per_epoch + - gradient_accumulation_steps + title: TrainingConfig + description: >- + Comprehensive configuration for the training process. + PreferenceOptimizeRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + finetuned_model: + type: string + description: The model to fine-tune. + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + description: The algorithm configuration. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + additionalProperties: false + required: + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config + title: PreferenceOptimizeRequest + PostTrainingJob: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + AlgorithmConfig: + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QATFinetuningConfig' + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + LoraFinetuningConfig: + type: object + properties: + type: + type: string + const: LoRA + default: LoRA + description: Algorithm type identifier, always "LoRA" + lora_attn_modules: + type: array + items: + type: string + description: >- + List of attention module names to apply LoRA to + apply_lora_to_mlp: + type: boolean + description: Whether to apply LoRA to MLP layers + apply_lora_to_output: + type: boolean + description: >- + Whether to apply LoRA to output projection layers + rank: + type: integer + description: >- + Rank of the LoRA adaptation (lower rank = fewer parameters) + alpha: + type: integer + description: >- + LoRA scaling parameter that controls adaptation strength + use_dora: + type: boolean + default: false + description: >- + (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + quantize_base: + type: boolean + default: false + description: >- + (Optional) Whether to quantize the base model weights + additionalProperties: false + required: + - type + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: >- + Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: + type: object + properties: + type: + type: string + const: QAT + default: QAT + description: Algorithm type identifier, always "QAT" + quantizer_name: + type: string + description: >- + Name of the quantization algorithm to use + group_size: + type: integer + description: Size of groups for grouped quantization + additionalProperties: false + required: + - type + - quantizer_name + - group_size + title: QATFinetuningConfig + description: >- + Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + model: + type: string + description: The model to fine-tune. + checkpoint_dir: + type: string + description: The directory to save checkpoint(s) to. + algorithm_config: + $ref: '#/components/schemas/AlgorithmConfig' + description: The algorithm configuration. + additionalProperties: false + required: + - job_uuid + - training_config + - hyperparam_search_config + - logger_config + title: SupervisedFineTuneRequest + responses: + BadRequest400: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 400 + title: Bad Request + detail: The request was invalid or malformed + TooManyRequests429: + description: >- + The client has sent too many requests in a given amount of time + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 429 + title: Too Many Requests + detail: >- + You have exceeded the rate limit. Please try again later. + InternalServerError500: + description: >- + The server encountered an unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 500 + title: Internal Server Error + detail: >- + An unexpected error occurred. Our team has been notified. + DefaultError: + description: An unexpected error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 0 + title: Error + detail: An unexpected error occurred +security: + - Default: [] +tags: + - name: Agents + description: >- + APIs for creating and interacting with agentic systems. + x-displayName: Agents + - name: Benchmarks + description: '' + - name: Conversations + description: >- + Protocol for conversation management operations. + x-displayName: Conversations + - name: DatasetIO + description: '' + - name: Datasets + description: '' + - name: Eval + description: >- + Llama Stack Evaluation API for running evaluations on model and agent candidates. + x-displayName: Evaluations + - name: Files + description: >- + This API is used to upload documents that can be used with other Llama Stack + APIs. + x-displayName: Files + - name: Inference + description: >- + Llama Stack Inference API for generating completions, chat completions, and + embeddings. + + + This API provides the raw interface to the underlying models. Two kinds of models + are supported: + + - LLM models: these models generate "raw" and "chat" (conversational) completions. + + - Embedding models: these models generate embeddings to be used for semantic + search. + x-displayName: Inference + - name: Inspect + description: >- + APIs for inspecting the Llama Stack service, including health status, available + API routes with methods and implementing providers. + x-displayName: Inspect + - name: Models + description: '' + - name: PostTraining (Coming Soon) + description: '' + - name: Prompts + description: >- + Protocol for prompt management operations. + x-displayName: Prompts + - name: Providers + description: >- + Providers API for inspecting, listing, and modifying providers and their configurations. + x-displayName: Providers + - name: Safety + description: OpenAI-compatible Moderations API. + x-displayName: Safety + - name: Scoring + description: '' + - name: ScoringFunctions + description: '' + - name: Shields + description: '' + - name: SyntheticDataGeneration (Coming Soon) + description: '' + - name: ToolGroups + description: '' + - name: ToolRuntime + description: '' + - name: VectorIO + description: '' +x-tagGroups: + - name: Operations + tags: + - Agents + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - SyntheticDataGeneration (Coming Soon) + - ToolGroups + - ToolRuntime + - VectorIO diff --git a/containers/Containerfile b/containers/Containerfile index c6e47fa1d..1ddf102af 100644 --- a/containers/Containerfile +++ b/containers/Containerfile @@ -60,6 +60,17 @@ ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH} # Copy the repository so editable installs and run configurations are available. COPY . /workspace +# Install the client package if it is provided +# NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python +RUN set -eux; \ + if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ + if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ + echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ + exit 1; \ + fi; \ + uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ + fi; + # Install llama-stack RUN set -eux; \ if [ "$INSTALL_MODE" = "editable" ]; then \ @@ -83,16 +94,6 @@ RUN set -eux; \ fi; \ fi; -# Install the client package if it is provided -RUN set -eux; \ - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ - if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ - echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ - exit 1; \ - fi; \ - uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ - fi; - # Install the dependencies for the distribution RUN set -eux; \ if [ -z "$DISTRO_NAME" ]; then \ diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index 8307448be..b1681dc62 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through: To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: ```yaml -models: - - model_id: nomic-ai/nomic-embed-text-v1.5 - provider_id: inline::sentence-transformers - metadata: - embedding_dimension: 768 - default_configured: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 ``` With this configuration: -- `client.vector_stores.create()` works without requiring embedding model parameters -- The system automatically uses the default model and its embedding dimension for any newly created vector store -- Only one model can be marked as `default_configured: true` +- `client.vector_stores.create()` works without requiring embedding model or provider parameters +- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available +- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store +- The `default_provider_id` specifies which vector storage backend to use +- The `default_embedding_model` specifies both the inference provider and model for embeddings ## Vector Store Operations @@ -108,14 +109,15 @@ With this configuration: You can create vector stores with automatic or explicit embedding model selection: ```python -# Automatic - uses default configured embedding model +# Automatic - uses default configured embedding model and vector store provider vs = client.vector_stores.create() -# Explicit - specify embedding model when you need a specific one +# Explicit - specify embedding model and/or provider when you need specific ones vs = client.vector_stores.create( extra_body={ - "embedding_model": "nomic-ai/nomic-embed-text-v1.5", - "embedding_dimension": 768 + "provider_id": "faiss", # Optional: specify vector store provider + "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5", + "embedding_dimension": 768 # Optional: will be auto-detected if not provided } ) ``` diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index 81243c97b..bf3156865 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -44,18 +44,32 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + persistence: + agent_state: + backend: kv_default + namespace: agents + responses: + backend: sql_default + table_name: responses telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index 3dbb0da97..c71ab05d8 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -1,56 +1,155 @@ apiVersion: v1 data: - stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- - inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n - \ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n - \ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: - ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: - ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: - remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n - \ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n - \ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n - \ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n - \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n - \ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n - \ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n - \ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n - \ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: - meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: - ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n - \ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db - \ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n - \ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n - \ provider_type: inline::meta-reference\n config:\n persistence_store:\n - \ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: - ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n - \ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: - ${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n - \ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n - \ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n - \ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: - 3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: - {}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n - \ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: - ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- - metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: - sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n - \ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: - ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n - \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: - []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: - builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n - \ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n - \ type: github_token\n" + stack_run_config.yaml: | + version: '2' + image_name: kubernetes-demo + apis: + - agents + - inference + - files + - safety + - telemetry + - tool_runtime + - vector_io + providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: vllm-safety + provider_type: remote::vllm + config: + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + kvstore: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime + server: + port: 8321 + auth: + provider_config: + type: github_token kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index ee28a1ea8..863565fdf 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -93,21 +93,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx index fd961745f..fac9b8406 100644 --- a/docs/docs/providers/agents/inline_meta-reference.mdx +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | -| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db -responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db +persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 ``` diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx index f43800555..45304fbb1 100644 --- a/docs/docs/providers/batches/inline_reference.mdx +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. | +| `kvstore` | `` | No | | Configuration for the key-value store backend. | | `max_concurrent_batches` | `` | No | 1 | Maximum number of concurrent batches to process simultaneously. | | `max_concurrent_requests_per_batch` | `` | No | 10 | Maximum number of concurrent requests to process per batch. | @@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence. ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db + namespace: batches + backend: kv_default ``` diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx index b02a3a3bd..a9363376c 100644 --- a/docs/docs/providers/datasetio/inline_localfs.mdx +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default ``` diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx index 82597d999..de3ffaaa6 100644 --- a/docs/docs/providers/datasetio/remote_huggingface.mdx +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default ``` diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx index b0eb589e0..2c86c18c9 100644 --- a/docs/docs/providers/eval/inline_meta-reference.mdx +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db + namespace: eval + backend: kv_default ``` diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx index 86d141f93..bff0c4eb9 100644 --- a/docs/docs/providers/files/inline_localfs.mdx +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `storage_dir` | `` | No | | Directory to store uploaded files | -| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | +| `metadata_store` | `` | No | | SQL store configuration for file metadata | | `ttl_secs` | `` | No | 31536000 | | ## Sample Configuration @@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo ```yaml storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db + table_name: files_metadata + backend: sql_default ``` diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx index 353cedbfb..65cd545c5 100644 --- a/docs/docs/providers/files/remote_s3.mdx +++ b/docs/docs/providers/files/remote_s3.mdx @@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | | `auto_create_bucket` | `` | No | False | Automatically create the S3 bucket if it doesn't exist | -| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | +| `metadata_store` | `` | No | | SQL store configuration for file metadata | ## Sample Configuration @@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=} endpoint_url: ${env.S3_ENDPOINT_URL:=} auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db + table_name: s3_files_metadata + backend: sql_default ``` diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx index a1858eacc..0be5cd5b3 100644 --- a/docs/docs/providers/vector_io/inline_chromadb.mdx +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | ## Sample Configuration ```yaml db_path: ${env.CHROMADB_PATH} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db +persistence: + namespace: vector_io::chroma + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx index 03bc2a928..3a1fba055 100644 --- a/docs/docs/providers/vector_io/inline_faiss.mdx +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -95,12 +95,12 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db +persistence: + namespace: vector_io::faiss + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx index bcad86750..17fd40cf5 100644 --- a/docs/docs/providers/vector_io/inline_meta-reference.mdx +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -14,14 +14,14 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db +persistence: + namespace: vector_io::faiss + backend: kv_default ``` ## Deprecation Notice diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx index 7e6f15c81..6063edab1 100644 --- a/docs/docs/providers/vector_io/inline_milvus.mdx +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -17,14 +17,14 @@ Please refer to the remote provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | ## Sample Configuration ```yaml db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db +persistence: + namespace: vector_io::milvus + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx index 5c9ab10f2..057d96761 100644 --- a/docs/docs/providers/vector_io/inline_qdrant.mdx +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db +persistence: + namespace: vector_io::qdrant + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index aa6992a56..98a372250 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | Path to the SQLite database file | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db +persistence: + namespace: vector_io::sqlite_vec + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 7f69f617d..67cbd0021 100644 --- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | Path to the SQLite database file | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db +persistence: + namespace: vector_io::sqlite_vec + backend: kv_default ``` ## Deprecation Notice diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx index 807771003..2aee3eeca 100644 --- a/docs/docs/providers/vector_io/remote_chromadb.mdx +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | ## Sample Configuration ```yaml url: ${env.CHROMADB_URL} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db +persistence: + namespace: vector_io::chroma_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx index 7f7c08122..bf9935d61 100644 --- a/docs/docs/providers/vector_io/remote_milvus.mdx +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `uri` | `` | No | | The URI of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | :::note @@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo ```yaml uri: ${env.MILVUS_ENDPOINT} token: ${env.MILVUS_TOKEN} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db +persistence: + namespace: vector_io::milvus_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx index d21810c68..cb70f35d1 100644 --- a/docs/docs/providers/vector_io/remote_pgvector.mdx +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `db` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration @@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432} db: ${env.PGVECTOR_DB} user: ${env.PGVECTOR_USER} password: ${env.PGVECTOR_PASSWORD} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db +persistence: + namespace: vector_io::pgvector + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx index c44a2b937..dff9642b5 100644 --- a/docs/docs/providers/vector_io/remote_qdrant.mdx +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -26,13 +26,13 @@ Please refer to the inline provider documentation. | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml api_key: ${env.QDRANT_API_KEY:=} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db +persistence: + namespace: vector_io::qdrant_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx index 3f1e36422..b809bed2e 100644 --- a/docs/docs/providers/vector_io/remote_weaviate.mdx +++ b/docs/docs/providers/vector_io/remote_weaviate.mdx @@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more |-------|------|----------|---------|-------------| | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml weaviate_api_key: null weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db +persistence: + namespace: vector_io::weaviate + backend: kv_default ``` diff --git a/docs/openapi_generator/run_openapi_generator.sh b/docs/openapi_generator/run_openapi_generator.sh index 45d00d6e7..6cffd42b0 100755 --- a/docs/openapi_generator/run_openapi_generator.sh +++ b/docs/openapi_generator/run_openapi_generator.sh @@ -30,3 +30,5 @@ fi stack_dir=$(dirname $(dirname $THIS_DIR)) PYTHONPATH=$PYTHONPATH:$stack_dir \ python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static + +cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 60a8b9fbd..98ed50c4f 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -9024,6 +9024,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9901,6 +9905,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index aaa6cd413..99c8dd03e 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -6734,6 +6734,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7403,6 +7407,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 413e4f23e..1091a1cb6 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -7600,6 +7600,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -8148,6 +8152,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 93e51de6a..6c3702374 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5815,6 +5815,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -6218,6 +6222,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 858f20725..ee0a265d3 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -9272,6 +9272,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9820,6 +9824,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 886549dbc..eff01931f 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -7028,6 +7028,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7431,6 +7435,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 25dc89a6b..821d6a8af 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel): :param tools: (Optional) An array of tools the model may call while generating a response. :param truncation: (Optional) Truncation strategy applied to the response :param usage: (Optional) Token usage information for the response + :param instructions: (Optional) System message inserted into the model's context """ created_at: int @@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel): tools: list[OpenAIResponseTool] | None = None truncation: str | None = None usage: OpenAIResponseUsage | None = None + instructions: str | None = None @json_schema_type diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 8fbf21f3e..5777f3d04 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta): models = "models" shields = "shields" + vector_dbs = "vector_dbs" # only used for routing datasets = "datasets" scoring_functions = "scoring_functions" benchmarks = "benchmarks" diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 53bf181e9..0368095cb 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Literal +from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel @@ -59,3 +59,35 @@ class ListVectorDBsResponse(BaseModel): """ data: list[VectorDB] + + +@runtime_checkable +class VectorDBs(Protocol): + """Internal protocol for vector_dbs routing - no public API endpoints.""" + + async def list_vector_dbs(self) -> ListVectorDBsResponse: + """Internal method to list vector databases.""" + ... + + async def get_vector_db( + self, + vector_db_id: str, + ) -> VectorDB: + """Internal method to get a vector database by ID.""" + ... + + async def register_vector_db( + self, + vector_db_id: str, + embedding_model: str, + embedding_dimension: int | None = 384, + provider_id: str | None = None, + vector_db_name: str | None = None, + provider_vector_db_id: str | None = None, + ) -> VectorDB: + """Internal method to register a vector database.""" + ... + + async def unregister_vector_db(self, vector_db_id: str) -> None: + """Internal method to unregister a vector database.""" + ... diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 471d5cb66..2a30ff394 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry from llama_stack.core.external import load_external_apis from llama_stack.core.resolver import InvalidProviderError from llama_stack.core.stack import replace_env_vars +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.exec import formulate_run_args, run_command from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions" @@ -286,21 +294,42 @@ def _generate_run_config( Generate a run.yaml template file for user to edit from a build.yaml file """ apis = list(build_config.distribution_spec.providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / image_name + storage = StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", + ), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference( + backend="kv_default", + namespace="registry", + ), + inference=InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ), + conversations=SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ), + ), + ) + run_config = StackRunConfig( container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), image_name=image_name, apis=apis, providers={}, + storage=storage, external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else EXTERNAL_PROVIDERS_DIR, ) - if not run_config.inference_store: - run_config.inference_store = SqliteSqlStoreConfig( - **SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db" - ) - ) # build providers dict provider_registry = get_provider_registry(build_config) for api in apis: diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py index 4d4c1b538..cc1ca051b 100644 --- a/llama_stack/cli/stack/utils.py +++ b/llama_stack/cli/stack/utils.py @@ -17,10 +17,19 @@ from llama_stack.core.datatypes import ( BuildConfig, Provider, StackRunConfig, + StorageConfig, ) from llama_stack.core.distribution import get_provider_registry from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, +) +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api @@ -51,11 +60,23 @@ def generate_run_config( Generate a run.yaml template file for user to edit from a build.yaml file """ apis = list(build_config.distribution_spec.providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / image_name run_config = StackRunConfig( container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), image_name=image_name, apis=apis, providers={}, + storage=StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")), + "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + ), + ), external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else EXTERNAL_PROVIDERS_DIR, diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py index bfa2c6d71..734839ea9 100644 --- a/llama_stack/core/configure.py +++ b/llama_stack/core/configure.py @@ -159,6 +159,37 @@ def upgrade_from_routing_table( config_dict["apis"] = config_dict["apis_to_serve"] config_dict.pop("apis_to_serve", None) + # Add default storage config if not present + if "storage" not in config_dict: + config_dict["storage"] = { + "backends": { + "kv_default": { + "type": "kv_sqlite", + "db_path": "~/.llama/kvstore.db", + }, + "sql_default": { + "type": "sql_sqlite", + "db_path": "~/.llama/sql_store.db", + }, + }, + "stores": { + "metadata": { + "namespace": "registry", + "backend": "kv_default", + }, + "inference": { + "table_name": "inference_store", + "backend": "sql_default", + "max_write_queue_size": 10000, + "num_writers": 4, + }, + "conversations": { + "table_name": "openai_conversations", + "backend": "sql_default", + }, + }, + } + return config_dict diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index d2537c7ee..66880ca36 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import os import secrets import time from typing import Any @@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import ( Conversations, Metadata, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.datatypes import AccessRule, StackRunConfig from llama_stack.log import get_logger from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import ( - SqliteSqlStoreConfig, - SqlStoreConfig, - sqlstore_impl, -) +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl logger = get_logger(name=__name__, category="openai_conversations") @@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations") class ConversationServiceConfig(BaseModel): """Configuration for the built-in conversation service. - :param conversations_store: SQL store configuration for conversations (defaults to SQLite) + :param run_config: Stack run configuration for resolving persistence :param policy: Access control rules """ - conversations_store: SqlStoreConfig = SqliteSqlStoreConfig( - db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix() - ) + run_config: StackRunConfig policy: list[AccessRule] = [] @@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations): self.deps = deps self.policy = config.policy - base_sql_store = sqlstore_impl(config.conversations_store) + # Use conversations store reference from run config + conversations_ref = config.run_config.storage.stores.conversations + if not conversations_ref: + raise ValueError("storage.stores.conversations must be configured in run config") + + base_sql_store = sqlstore_impl(conversations_ref) self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) async def initialize(self) -> None: """Initialize the store and create tables.""" - if isinstance(self.config.conversations_store, SqliteSqlStoreConfig): - os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True) - await self.sql_store.create_table( "openai_conversations", { diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 94222d49e..6d06adb84 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO from llama_stack.core.access_control.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ( + KVStoreReference, + StorageBackendType, + StorageConfig, +) from llama_stack.providers.datatypes import Api, ProviderSpec -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_RUN_CONFIG_VERSION = 2 @@ -351,12 +354,32 @@ class AuthenticationRequiredError(Exception): pass +class QualifiedModel(BaseModel): + """A qualified model identifier, consisting of a provider ID and a model ID.""" + + provider_id: str + model_id: str + + +class VectorStoresConfig(BaseModel): + """Configuration for vector stores in the stack.""" + + default_provider_id: str | None = Field( + default=None, + description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.", + ) + default_embedding_model: QualifiedModel | None = Field( + default=None, + description="Default embedding model configuration for vector stores.", + ) + + class QuotaPeriod(StrEnum): DAY = "day" class QuotaConfig(BaseModel): - kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") authenticated_max_requests: int = Field( default=1000, description="Max requests for authenticated clients per period" @@ -438,18 +461,6 @@ class ServerConfig(BaseModel): ) -class InferenceStoreConfig(BaseModel): - sql_store_config: SqlStoreConfig - max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store") - num_writers: int = Field(default=4, description="Number of concurrent background writers") - - -class ResponsesStoreConfig(BaseModel): - sql_store_config: SqlStoreConfig - max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") - num_writers: int = Field(default=4, description="Number of concurrent background writers") - - class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION @@ -476,26 +487,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re can be instantiated multiple times (with different configs) if necessary. """, ) - metadata_store: KVStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the distribution registry. If not specified, -a default SQLite store will be used.""", - ) - - inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the inference API. Can be either a -InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated). -If not specified, a default SQLite store will be used.""", - ) - - conversations_store: SqlStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the conversations API. -If not specified, a default SQLite store will be used.""", + storage: StorageConfig = Field( + description="Catalog of named storage backends and references available to the stack", ) # registry of "resources" in the distribution @@ -526,6 +519,11 @@ If not specified, a default SQLite store will be used.""", description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", ) + vector_stores: VectorStoresConfig | None = Field( + default=None, + description="Configuration for vector stores, including default embedding model", + ) + @field_validator("external_providers_dir") @classmethod def validate_external_providers_dir(cls, v): @@ -535,6 +533,49 @@ If not specified, a default SQLite store will be used.""", return Path(v) return v + @model_validator(mode="after") + def validate_server_stores(self) -> "StackRunConfig": + backend_map = self.storage.backends + stores = self.storage.stores + kv_backends = { + name + for name, cfg in backend_map.items() + if cfg.type + in { + StorageBackendType.KV_REDIS, + StorageBackendType.KV_SQLITE, + StorageBackendType.KV_POSTGRES, + StorageBackendType.KV_MONGODB, + } + } + sql_backends = { + name + for name, cfg in backend_map.items() + if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES} + } + + def _ensure_backend(reference, expected_set, store_name: str) -> None: + if reference is None: + return + backend_name = reference.backend + if backend_name not in backend_map: + raise ValueError( + f"{store_name} references unknown backend '{backend_name}'. " + f"Available backends: {sorted(backend_map)}" + ) + if backend_name not in expected_set: + raise ValueError( + f"{store_name} references backend '{backend_name}' of type " + f"'{backend_map[backend_name].type.value}', but a backend of type " + f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." + ) + + _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata") + _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") + _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") + _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") + return self + class BuildConfig(BaseModel): version: int = LLAMA_STACK_BUILD_CONFIG_VERSION diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py index 0e1f672c3..59461f5d6 100644 --- a/llama_stack/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]: routing_table_api=Api.tool_groups, router_api=Api.tool_runtime, ), + AutoRoutedApiInfo( + routing_table_api=Api.vector_dbs, + router_api=Api.vector_io, + ), ] diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py index 26e8f5cef..856397ca5 100644 --- a/llama_stack/core/prompts/prompts.py +++ b/llama_stack/core/prompts/prompts.py @@ -11,9 +11,8 @@ from pydantic import BaseModel from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig class PromptServiceConfig(BaseModel): @@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts): self.kvstore: KVStore async def initialize(self) -> None: - kvstore_config = SqliteKVStoreConfig( - db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix() - ) - self.kvstore = await kvstore_impl(kvstore_config) + # Use metadata store backend with prompts-specific namespace + metadata_ref = self.config.run_config.storage.stores.metadata + if not metadata_ref: + raise ValueError("storage.stores.metadata must be configured in run config") + prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) + self.kvstore = await kvstore_impl(prompts_ref) def _get_default_key(self, prompt_id: str) -> str: """Get the KVStore key that stores the default version number.""" diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index acd459f99..6e1843870 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -29,6 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime +from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl @@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.inspect: Inspect, Api.batches: Batches, Api.vector_io: VectorIO, + Api.vector_dbs: VectorDBs, Api.models: Models, Api.safety: Safety, Api.shields: Shields, diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py index 4463d2460..df4df0463 100644 --- a/llama_stack/core/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -6,7 +6,10 @@ from typing import Any -from llama_stack.core.datatypes import AccessRule, RoutedProtocol +from llama_stack.core.datatypes import ( + AccessRule, + RoutedProtocol, +) from llama_stack.core.stack import StackRunConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.datatypes import Api, RoutingTable @@ -26,6 +29,7 @@ async def get_routing_table_impl( from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable + from ..routing_tables.vector_dbs import VectorDBsRoutingTable api_to_tables = { "models": ModelsRoutingTable, @@ -34,6 +38,7 @@ async def get_routing_table_impl( "scoring_functions": ScoringFunctionsRoutingTable, "benchmarks": BenchmarksRoutingTable, "tool_groups": ToolGroupsRoutingTable, + "vector_dbs": VectorDBsRoutingTable, } if api.value not in api_to_tables: @@ -76,14 +81,21 @@ async def get_auto_router_impl( api_to_dep_impl[dep_name] = deps[dep_api] # TODO: move pass configs to routers instead - if api == Api.inference and run_config.inference_store: + if api == Api.inference: + inference_ref = run_config.storage.stores.inference + if not inference_ref: + raise ValueError("storage.stores.inference must be configured in run config") + inference_store = InferenceStore( - config=run_config.inference_store, + reference=inference_ref, policy=policy, ) await inference_store.initialize() api_to_dep_impl["store"] = inference_store + elif api == Api.vector_io: + api_to_dep_impl["vector_stores_config"] = run_config.vector_stores + impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) await impl.initialize() return impl diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index f4e871a40..bfc5f7164 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable @@ -43,9 +44,11 @@ class VectorIORouter(VectorIO): def __init__( self, routing_table: RoutingTable, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: logger.debug("Initializing VectorIORouter") self.routing_table = routing_table + self.vector_stores_config = vector_stores_config async def initialize(self) -> None: logger.debug("VectorIORouter.initialize") @@ -122,6 +125,17 @@ class VectorIORouter(VectorIO): embedding_dimension = extra.get("embedding_dimension") provider_id = extra.get("provider_id") + # Use default embedding model if not specified + if ( + embedding_model is None + and self.vector_stores_config + and self.vector_stores_config.default_embedding_model is not None + ): + # Construct the full model ID with provider prefix + embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id + model_id = self.vector_stores_config.default_embedding_model.model_id + embedding_model = f"{embedding_provider_id}/{model_id}" + if embedding_model is not None and embedding_dimension is None: embedding_dimension = await self._get_embedding_model_dimension(embedding_model) @@ -132,11 +146,24 @@ class VectorIORouter(VectorIO): raise ValueError("No vector_io providers available") if num_providers > 1: available_providers = list(self.routing_table.impls_by_provider_id.keys()) - raise ValueError( - f"Multiple vector_io providers available. Please specify provider_id in extra_body. " - f"Available providers: {available_providers}" - ) - provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] + # Use default configured provider + if self.vector_stores_config and self.vector_stores_config.default_provider_id: + default_provider = self.vector_stores_config.default_provider_id + if default_provider in available_providers: + provider_id = default_provider + logger.debug(f"Using configured default vector store provider: {provider_id}") + else: + raise ValueError( + f"Configured default vector store provider '{default_provider}' not found. " + f"Available providers: {available_providers}" + ) + else: + raise ValueError( + f"Multiple vector_io providers available. Please specify provider_id in extra_body. " + f"Available providers: {available_providers}" + ) + else: + provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] vector_db_id = f"vs_{uuid.uuid4()}" registered_vector_db = await self.routing_table.register_vector_db( @@ -243,8 +270,7 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_delete_vector_store(vector_store_id) + return await self.routing_table.openai_delete_vector_store(vector_store_id) async def openai_search_vector_store( self, diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 8df0a89a9..087483bb6 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable): from .scoring_functions import ScoringFunctionsRoutingTable from .shields import ShieldsRoutingTable from .toolgroups import ToolGroupsRoutingTable + from .vector_dbs import VectorDBsRoutingTable def apiname_object(): if isinstance(self, ModelsRoutingTable): return ("Inference", "model") elif isinstance(self, ShieldsRoutingTable): return ("Safety", "shield") + elif isinstance(self, VectorDBsRoutingTable): + return ("VectorIO", "vector_db") elif isinstance(self, DatasetsRoutingTable): return ("DatasetIO", "dataset") elif isinstance(self, ScoringFunctionsRoutingTable): diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py new file mode 100644 index 000000000..e87fb61c6 --- /dev/null +++ b/llama_stack/core/routing_tables/vector_dbs.py @@ -0,0 +1,323 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import TypeAdapter + +from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError +from llama_stack.apis.models import ModelType +from llama_stack.apis.resource import ResourceType + +# Removed VectorDBs import to avoid exposing public API +from llama_stack.apis.vector_io.vector_io import ( + OpenAICreateVectorStoreRequestWithExtraBody, + SearchRankingOptions, + VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, + VectorStoreFileContentsResponse, + VectorStoreFileDeleteResponse, + VectorStoreFileObject, + VectorStoreFileStatus, + VectorStoreObject, + VectorStoreSearchResponsePage, +) +from llama_stack.core.datatypes import ( + VectorDBWithOwner, +) +from llama_stack.log import get_logger + +from .common import CommonRoutingTableImpl, lookup_model + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class VectorDBsRoutingTable(CommonRoutingTableImpl): + """Internal routing table for vector_db operations. + + Does not inherit from VectorDBs to avoid exposing public API endpoints. + Only provides internal routing functionality for VectorIORouter. + """ + + # Internal methods only - no public API exposure + + async def register_vector_db( + self, + vector_db_id: str, + embedding_model: str, + embedding_dimension: int | None = 384, + provider_id: str | None = None, + provider_vector_db_id: str | None = None, + vector_db_name: str | None = None, + ) -> Any: + if provider_id is None: + if len(self.impls_by_provider_id) > 0: + provider_id = list(self.impls_by_provider_id.keys())[0] + if len(self.impls_by_provider_id) > 1: + logger.warning( + f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}." + ) + else: + raise ValueError("No provider available. Please configure a vector_io provider.") + model = await lookup_model(self, embedding_model) + if model is None: + raise ModelNotFoundError(embedding_model) + if model.model_type != ModelType.embedding: + raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) + if "embedding_dimension" not in model.metadata: + raise ValueError(f"Model {embedding_model} does not have an embedding dimension") + + try: + provider = self.impls_by_provider_id[provider_id] + except KeyError: + available_providers = list(self.impls_by_provider_id.keys()) + raise ValueError( + f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}" + ) from None + logger.warning( + "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly." + ) + request = OpenAICreateVectorStoreRequestWithExtraBody( + name=vector_db_name or vector_db_id, + embedding_model=embedding_model, + embedding_dimension=model.metadata["embedding_dimension"], + provider_id=provider_id, + provider_vector_db_id=provider_vector_db_id, + ) + vector_store = await provider.openai_create_vector_store(request) + + vector_store_id = vector_store.id + actual_provider_vector_db_id = provider_vector_db_id or vector_store_id + logger.warning( + f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name" + ) + + vector_db_data = { + "identifier": vector_store_id, + "type": ResourceType.vector_db.value, + "provider_id": provider_id, + "provider_resource_id": actual_provider_vector_db_id, + "embedding_model": embedding_model, + "embedding_dimension": model.metadata["embedding_dimension"], + "vector_db_name": vector_store.name, + } + vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data) + await self.register_object(vector_db) + return vector_db + + async def openai_retrieve_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreObject: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) + + async def openai_update_vector_store( + self, + vector_store_id: str, + name: str | None = None, + expires_after: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreObject: + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( + vector_store_id=vector_store_id, + name=name, + expires_after=expires_after, + metadata=metadata, + ) + + async def openai_delete_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreDeleteResponse: + await self.assert_action_allowed("delete", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + result = await provider.openai_delete_vector_store(vector_store_id) + await self.unregister_vector_db(vector_store_id) + return result + + async def unregister_vector_db(self, vector_store_id: str) -> None: + """Remove the vector store from the routing table registry.""" + try: + vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id) + if vector_db_obj: + await self.unregister_object(vector_db_obj) + except Exception as e: + # Log the error but don't fail the operation + logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") + + async def openai_search_vector_store( + self, + vector_store_id: str, + query: str | list[str], + filters: dict[str, Any] | None = None, + max_num_results: int | None = 10, + ranking_options: SearchRankingOptions | None = None, + rewrite_query: bool | None = False, + search_mode: str | None = "vector", + ) -> VectorStoreSearchResponsePage: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( + vector_store_id=vector_store_id, + query=query, + filters=filters, + max_num_results=max_num_results, + ranking_options=ranking_options, + rewrite_query=rewrite_query, + search_mode=search_mode, + ) + + async def openai_attach_file_to_vector_store( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any] | None = None, + chunking_strategy: VectorStoreChunkingStrategy | None = None, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_list_files_in_vector_store( + self, + vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, + ) -> list[VectorStoreFileObject]: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( + vector_store_id=vector_store_id, + limit=limit, + order=order, + after=after, + before=before, + filter=filter, + ) + + async def openai_retrieve_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_retrieve_vector_store_file_contents( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileContentsResponse: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_contents( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_update_vector_store_file( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any], + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + ) + + async def openai_delete_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileDeleteResponse: + await self.assert_action_allowed("delete", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_create_vector_store_file_batch( + self, + vector_store_id: str, + file_ids: list[str], + attributes: dict[str, Any] | None = None, + chunking_strategy: Any | None = None, + ): + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch( + vector_store_id=vector_store_id, + file_ids=file_ids, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_retrieve_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) + + async def openai_list_files_in_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + after: str | None = None, + before: str | None = None, + filter: str | None = None, + limit: int | None = 20, + order: str | None = "desc", + ): + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + after=after, + before=before, + filter=filter, + limit=limit, + order=order, + ) + + async def openai_cancel_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) diff --git a/llama_stack/core/server/auth_providers.py b/llama_stack/core/server/auth_providers.py index 05a21c8d4..0fe5f1558 100644 --- a/llama_stack/core/server/auth_providers.py +++ b/llama_stack/core/server/auth_providers.py @@ -72,13 +72,30 @@ class AuthProvider(ABC): def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]: attributes: dict[str, list[str]] = {} for claim_key, attribute_key in mapping.items(): - if claim_key not in claims: + # First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles") + # Then fall back to literal key with dots (e.g., "my.dotted.key") + claim: object = claims + keys = claim_key.split(".") + for key in keys: + if isinstance(claim, dict) and key in claim: + claim = claim[key] + else: + claim = None + break + + if claim is None and claim_key in claims: + # Fall back to checking if claim_key exists as a literal key + claim = claims[claim_key] + + if claim is None: continue - claim = claims[claim_key] + if isinstance(claim, list): values = claim - else: + elif isinstance(claim, str): values = claim.split() + else: + continue if attribute_key in attributes: attributes[attribute_key].extend(values) diff --git a/llama_stack/core/server/quota.py b/llama_stack/core/server/quota.py index 693f224c3..689f0e4c3 100644 --- a/llama_stack/core/server/quota.py +++ b/llama_stack/core/server/quota.py @@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta from starlette.types import ASGIApp, Receive, Scope, Send +from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl +from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl logger = get_logger(name=__name__, category="core::server") @@ -33,7 +33,7 @@ class QuotaMiddleware: def __init__( self, app: ASGIApp, - kv_config: KVStoreConfig, + kv_config: KVStoreReference, anonymous_max_requests: int, authenticated_max_requests: int, window_seconds: int = 86400, @@ -45,15 +45,15 @@ class QuotaMiddleware: self.authenticated_max_requests = authenticated_max_requests self.window_seconds = window_seconds - if isinstance(self.kv_config, SqliteKVStoreConfig): - logger.warning( - "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " - f"window_seconds={self.window_seconds}" - ) - async def _get_kv(self) -> KVStore: if self.kv is None: self.kv = await kvstore_impl(self.kv_config) + backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend) + if backend_config and backend_config.type == StorageBackendType.KV_SQLITE: + logger.warning( + "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " + f"window_seconds={self.window_seconds}" + ) return self.kv async def __call__(self, scope: Scope, receive: Receive, send: Send): diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 733b55262..a2f7babd2 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -35,13 +35,23 @@ from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, StackRunConfig +from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl from llama_stack.core.providers import ProviderImpl, ProviderImplConfig from llama_stack.core.resolver import ProviderRegistry, resolve_impls from llama_stack.core.routing_tables.common import CommonRoutingTableImpl +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageConfig, +) from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger @@ -98,30 +108,6 @@ REGISTRY_REFRESH_TASK = None TEST_RECORDING_CONTEXT = None -async def validate_default_embedding_model(impls: dict[Api, Any]): - """Validate that at most one embedding model is marked as default.""" - if Api.models not in impls: - return - - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = response.data if hasattr(response, "data") else response - - default_embedding_models = [] - for model in models_list: - if model.model_type == "embedding" and model.metadata.get("default_configured") is True: - default_embedding_models.append(model.identifier) - - if len(default_embedding_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. " - "Only one embedding model can be marked as default." - ) - - if default_embedding_models: - logger.info(f"Default embedding model configured: {default_embedding_models[0]}") - - async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): for rsrc, api, register_method, list_method in RESOURCES: objects = getattr(run_config, rsrc) @@ -152,7 +138,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}", ) - await validate_default_embedding_model(impls) + +async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]): + """Validate vector stores configuration.""" + if vector_stores_config is None: + return + + default_embedding_model = vector_stores_config.default_embedding_model + if default_embedding_model is None: + return + + provider_id = default_embedding_model.provider_id + model_id = default_embedding_model.model_id + default_model_id = f"{provider_id}/{model_id}" + + if Api.models not in impls: + raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") + + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} + + default_model = models_list.get(default_model_id) + if default_model is None: + raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}") + + embedding_dimension = default_model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + + logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") class EnvVarError(Exception): @@ -329,6 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf impls[Api.conversations] = conversations_impl +def _initialize_storage(run_config: StackRunConfig): + kv_backends: dict[str, StorageBackendConfig] = {} + sql_backends: dict[str, StorageBackendConfig] = {} + for backend_name, backend_config in run_config.storage.backends.items(): + type = backend_config.type.value + if type.startswith("kv_"): + kv_backends[backend_name] = backend_config + elif type.startswith("sql_"): + sql_backends[backend_name] = backend_config + else: + raise ValueError(f"Unknown storage backend type: {type}") + + from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends + from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + register_kvstore_backends(kv_backends) + register_sqlstore_backends(sql_backends) + + class Stack: def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None): self.run_config = run_config @@ -347,7 +386,11 @@ class Stack: TEST_RECORDING_CONTEXT.__enter__() logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}") - dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name) + _initialize_storage(self.run_config) + stores = self.run_config.storage.stores + if not stores.metadata: + raise ValueError("storage.stores.metadata must be configured with a kv_* backend") + dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name) policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else [] internal_impls = {} @@ -367,8 +410,8 @@ class Stack: await impls[Api.conversations].initialize() await register_resources(self.run_config, impls) - await refresh_registry_once(impls) + await validate_vector_stores_config(self.run_config.vector_stores, impls) self.impls = impls def create_registry_refresh_task(self): @@ -488,5 +531,16 @@ def run_config_from_adhoc_config_spec( image_name="distro-test", apis=list(provider_configs_by_api.keys()), providers=provider_configs_by_api, + storage=StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"), + "sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + ), + ), ) return config diff --git a/llama_stack/core/storage/__init__.py b/llama_stack/core/storage/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/core/storage/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py new file mode 100644 index 000000000..9df170e10 --- /dev/null +++ b/llama_stack/core/storage/datatypes.py @@ -0,0 +1,283 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +from abc import abstractmethod +from enum import StrEnum +from pathlib import Path +from typing import Annotated, Literal + +from pydantic import BaseModel, Field, field_validator + + +class StorageBackendType(StrEnum): + KV_REDIS = "kv_redis" + KV_SQLITE = "kv_sqlite" + KV_POSTGRES = "kv_postgres" + KV_MONGODB = "kv_mongodb" + SQL_SQLITE = "sql_sqlite" + SQL_POSTGRES = "sql_postgres" + + +class CommonConfig(BaseModel): + namespace: str | None = Field( + default=None, + description="All keys will be prefixed with this namespace", + ) + + +class RedisKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS + host: str = "localhost" + port: int = 6379 + + @property + def url(self) -> str: + return f"redis://{self.host}:{self.port}" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["redis"] + + @classmethod + def sample_run_config(cls): + return { + "type": StorageBackendType.KV_REDIS.value, + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", + } + + +class SqliteKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE + db_path: str = Field( + description="File path for the sqlite database", + ) + + @classmethod + def pip_packages(cls) -> list[str]: + return ["aiosqlite"] + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): + return { + "type": StorageBackendType.KV_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + +class PostgresKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + ssl_mode: str | None = None + ca_cert_path: str | None = None + table_name: str = "llamastack_kvstore" + + @classmethod + def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): + return { + "type": StorageBackendType.KV_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", + } + + @classmethod + @field_validator("table_name") + def validate_table_name(cls, v: str) -> str: + # PostgreSQL identifiers rules: + # - Must start with a letter or underscore + # - Can contain letters, numbers, and underscores + # - Maximum length is 63 bytes + pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" + if not re.match(pattern, v): + raise ValueError( + "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" + ) + if len(v) > 63: + raise ValueError("Table name must be less than 63 characters") + return v + + @classmethod + def pip_packages(cls) -> list[str]: + return ["psycopg2-binary"] + + +class MongoDBKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB + host: str = "localhost" + port: int = 27017 + db: str = "llamastack" + user: str | None = None + password: str | None = None + collection_name: str = "llamastack_kvstore" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["pymongo"] + + @classmethod + def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): + return { + "type": StorageBackendType.KV_MONGODB.value, + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", + "db": "${env.MONGODB_DB}", + "user": "${env.MONGODB_USER}", + "password": "${env.MONGODB_PASSWORD}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", + } + + +class SqlAlchemySqlStoreConfig(BaseModel): + @property + @abstractmethod + def engine_str(self) -> str: ... + + # TODO: move this when we have a better way to specify dependencies with internal APIs + @classmethod + def pip_packages(cls) -> list[str]: + return ["sqlalchemy[asyncio]"] + + +class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE + db_path: str = Field( + description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", + ) + + @property + def engine_str(self) -> str: + return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): + return { + "type": StorageBackendType.SQL_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["aiosqlite"] + + +class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + + @property + def engine_str(self) -> str: + return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["asyncpg"] + + @classmethod + def sample_run_config(cls, **kwargs): + return { + "type": StorageBackendType.SQL_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + } + + +# reference = (backend_name, table_name) +class SqlStoreReference(BaseModel): + """A reference to a 'SQL-like' persistent store. A table name must be provided.""" + + table_name: str = Field( + description="Name of the table to use for the SqlStore", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +# reference = (backend_name, namespace) +class KVStoreReference(BaseModel): + """A reference to a 'key-value' persistent store. A namespace must be provided.""" + + namespace: str = Field( + description="Key prefix for KVStore backends", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +StorageBackendConfig = Annotated[ + RedisKVStoreConfig + | SqliteKVStoreConfig + | PostgresKVStoreConfig + | MongoDBKVStoreConfig + | SqliteSqlStoreConfig + | PostgresSqlStoreConfig, + Field(discriminator="type"), +] + + +class InferenceStoreReference(SqlStoreReference): + """Inference store configuration with queue tuning.""" + + max_write_queue_size: int = Field( + default=10000, + description="Max queued writes for inference store", + ) + num_writers: int = Field( + default=4, + description="Number of concurrent background writers", + ) + + +class ResponsesStoreReference(InferenceStoreReference): + """Responses store configuration with queue tuning.""" + + +class ServerStoresConfig(BaseModel): + metadata: KVStoreReference | None = Field( + default=None, + description="Metadata store configuration (uses KV backend)", + ) + inference: InferenceStoreReference | None = Field( + default=None, + description="Inference store configuration (uses SQL backend)", + ) + conversations: SqlStoreReference | None = Field( + default=None, + description="Conversations store configuration (uses SQL backend)", + ) + responses: ResponsesStoreReference | None = Field( + default=None, + description="Responses store configuration (uses SQL backend)", + ) + + +class StorageConfig(BaseModel): + backends: dict[str, StorageBackendConfig] = Field( + description="Named backend configurations (e.g., 'default', 'cache')", + ) + stores: ServerStoresConfig = Field( + default_factory=lambda: ServerStoresConfig(), + description="Named references to storage backends used by the stack core", + ) diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py index 04581bab5..6ff9e575b 100644 --- a/llama_stack/core/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -11,10 +11,9 @@ from typing import Protocol import pydantic from llama_stack.core.datatypes import RoutableObjectWithProvider -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig logger = get_logger(__name__, category="core::registry") @@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry): async def create_dist_registry( - metadata_store: KVStoreConfig | None, - image_name: str, + metadata_store: KVStoreReference, image_name: str ) -> tuple[CachedDiskDistributionRegistry, KVStore]: # instantiate kvstore for storing and retrieving distribution metadata - if metadata_store: - dist_kvstore = await kvstore_impl(metadata_store) - else: - dist_kvstore = await kvstore_impl( - SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix()) - ) + dist_kvstore = await kvstore_impl(metadata_store) dist_registry = CachedDiskDistributionRegistry(dist_kvstore) await dist_registry.initialize() return dist_registry, dist_kvstore diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml index 191d0ae59..c01e415a9 100644 --- a/llama_stack/distributions/ci-tests/build.yaml +++ b/llama_stack/distributions/ci-tests/build.yaml @@ -25,6 +25,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index a6a6b7c0d..1653dc9bd 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -163,21 +181,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -207,17 +225,28 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard @@ -239,3 +268,8 @@ server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index 5da3cf511..3130285b9 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -26,9 +26,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,32 +38,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -86,15 +89,26 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index ac0fdc0fa..af1a96a21 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -22,9 +22,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,32 +34,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -82,15 +85,26 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 874c5050f..b43d1ff19 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -37,9 +37,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,32 +49,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -99,15 +102,26 @@ providers: provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index 50553d2c7..59e2d8129 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -27,9 +27,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,32 +39,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -89,15 +92,26 @@ providers: provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index e0482f67d..e06787d0b 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -28,9 +28,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: nvidia provider_type: remote::nvidia @@ -41,12 +41,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: nvidia provider_type: remote::nvidia @@ -65,8 +68,8 @@ providers: provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default - provider_id: nvidia provider_type: remote::nvidia config: @@ -86,17 +89,28 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 950782eed..85e0743e4 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -23,9 +23,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: nvidia provider_type: remote::nvidia @@ -36,12 +36,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: nvidia provider_type: remote::nvidia @@ -75,17 +78,28 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: [] vector_dbs: [] diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index a738887b4..2c6936bfc 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -39,16 +39,16 @@ providers: provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: @@ -57,9 +57,9 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -69,32 +69,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -119,15 +122,26 @@ providers: provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: gpt-4o diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py index 1f3e88b3b..876370ef3 100644 --- a/llama_stack/distributions/postgres-demo/postgres_demo.py +++ b/llama_stack/distributions/postgres-demo/postgres_demo.py @@ -91,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 768, }, ) - postgres_config = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -105,22 +104,16 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": inference_providers + [embedding_provider], "vector_io": vector_io_providers, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - metadata_store=PostgresKVStoreConfig.sample_run_config(), - inference_store=postgres_config, + storage_backends={ + "kv_default": PostgresKVStoreConfig.sample_run_config( + table_name="llamastack_kvstore", + ), + "sql_default": PostgresSqlStoreConfig.sample_run_config(), + }, ), }, run_config_env_vars={ diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index 62faf3f62..9556b1287 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -22,9 +22,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,20 +34,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -63,24 +58,35 @@ providers: provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml index 943c6134d..b2e2a0c85 100644 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ b/llama_stack/distributions/starter-gpu/build.yaml @@ -26,6 +26,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 370d4b516..81f564779 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: huggingface-gpu provider_type: inline::huggingface-gpu @@ -166,21 +184,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -210,17 +228,28 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard @@ -242,3 +271,8 @@ server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml index c2719d50d..baa80ef3e 100644 --- a/llama_stack/distributions/starter/build.yaml +++ b/llama_stack/distributions/starter/build.yaml @@ -26,6 +26,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 2f4e7f350..dc611a446 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -163,21 +181,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -207,17 +225,28 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard @@ -239,3 +268,8 @@ server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index f87ebcc5f..c8c7101a6 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -11,8 +11,10 @@ from llama_stack.core.datatypes import ( BuildProvider, Provider, ProviderSpec, + QualifiedModel, ShieldInput, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings @@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC from llama_stack.providers.remote.vector_io.pgvector.config import ( PGVectorVectorIOConfig, ) +from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig +from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig @@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: BuildProvider(provider_type="inline::milvus"), BuildProvider(provider_type="remote::chromadb"), BuildProvider(provider_type="remote::pgvector"), + BuildProvider(provider_type="remote::qdrant"), + BuildProvider(provider_type="remote::weaviate"), ], "files": [BuildProvider(provider_type="inline::localfs")], "safety": [ @@ -221,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: password="${env.PGVECTOR_PASSWORD:=}", ), ), + Provider( + provider_id="${env.QDRANT_URL:+qdrant}", + provider_type="remote::qdrant", + config=QdrantVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + url="${env.QDRANT_URL:=}", + ), + ), + Provider( + provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", + provider_type="remote::weaviate", + config=WeaviateVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", + ), + ), ], "files": [files_provider], }, default_models=[], default_tool_groups=default_tool_groups, default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), ), }, run_config_env_vars={ diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index 807829999..daa609388 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -27,8 +27,15 @@ from llama_stack.core.datatypes import ( ShieldInput, TelemetryConfig, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + SqlStoreReference, + StorageBackendType, +) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry @@ -180,10 +187,10 @@ class RunConfigSettings(BaseModel): default_tool_groups: list[ToolGroupInput] | None = None default_datasets: list[DatasetInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None - metadata_store: dict | None = None - inference_store: dict | None = None - conversations_store: dict | None = None + vector_stores_config: VectorStoresConfig | None = None telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) + storage_backends: dict[str, Any] | None = None + storage_stores: dict[str, Any] | None = None def run_config( self, @@ -226,28 +233,45 @@ class RunConfigSettings(BaseModel): # Get unique set of APIs from providers apis = sorted(providers.keys()) + storage_backends = self.storage_backends or { + "kv_default": SqliteKVStoreConfig.sample_run_config( + __distro_dir__=f"~/.llama/distributions/{name}", + db_name="kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig.sample_run_config( + __distro_dir__=f"~/.llama/distributions/{name}", + db_name="sql_store.db", + ), + } + + storage_stores = self.storage_stores or { + "metadata": KVStoreReference( + backend="kv_default", + namespace="registry", + ).model_dump(exclude_none=True), + "inference": InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ).model_dump(exclude_none=True), + "conversations": SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ).model_dump(exclude_none=True), + } + + storage_config = dict( + backends=storage_backends, + stores=storage_stores, + ) + # Return a dict that matches StackRunConfig structure - return { + config = { "version": LLAMA_STACK_RUN_CONFIG_VERSION, "image_name": name, "container_image": container_image, "apis": apis, "providers": provider_configs, - "metadata_store": self.metadata_store - or SqliteKVStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="registry.db", - ), - "inference_store": self.inference_store - or SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="inference_store.db", - ), - "conversations_store": self.conversations_store - or SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="conversations.db", - ), + "storage": storage_config, "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], "vector_dbs": [], @@ -261,6 +285,11 @@ class RunConfigSettings(BaseModel): "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } + if self.vector_stores_config: + config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True) + + return config + class DistributionTemplate(BaseModel): """ @@ -297,11 +326,15 @@ class DistributionTemplate(BaseModel): # We should have a better way to do this by formalizing the concept of "internal" APIs # and providers, with a way to specify dependencies for them. - if run_config_.get("inference_store"): - additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"])) - - if run_config_.get("metadata_store"): - additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"])) + storage_cfg = run_config_.get("storage", {}) + for backend_cfg in storage_cfg.get("backends", {}).values(): + store_type = backend_cfg.get("type") + if not store_type: + continue + if str(store_type).startswith("kv_"): + additional_pip_packages.extend(get_kv_pip_packages(backend_cfg)) + elif str(store_type).startswith("sql_"): + additional_pip_packages.extend(get_sql_pip_packages(backend_cfg)) if self.additional_pip_packages: additional_pip_packages.extend(self.additional_pip_packages) @@ -387,11 +420,13 @@ class DistributionTemplate(BaseModel): def enum_representer(dumper, data): return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) - # Register YAML representer for ModelType + # Register YAML representer for enums yaml.add_representer(ModelType, enum_representer) yaml.add_representer(DatasetPurpose, enum_representer) + yaml.add_representer(StorageBackendType, enum_representer) yaml.SafeDumper.add_representer(ModelType, enum_representer) yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer) + yaml.SafeDumper.add_representer(StorageBackendType, enum_representer) for output_dir in [yaml_output_dir, doc_output_dir]: output_dir.mkdir(parents=True, exist_ok=True) diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index c3db4eeb8..37866cb32 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -22,9 +22,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,32 +34,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -90,17 +93,28 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: [] vector_dbs: [] diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 810c063e6..c2f6ea640 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents): self.policy = policy async def initialize(self) -> None: - self.persistence_store = await kvstore_impl(self.config.persistence_store) - self.responses_store = ResponsesStore(self.config.responses_store, self.policy) + self.persistence_store = await kvstore_impl(self.config.persistence.agent_state) + self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy) await self.responses_store.initialize() self.openai_responses_impl = OpenAIResponsesImpl( inference_api=self.inference_api, diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py index 1c392f29c..a800b426b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/config.py +++ b/llama_stack/providers/inline/agents/meta_reference/config.py @@ -8,24 +8,30 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore import KVStoreConfig -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference + + +class AgentPersistenceConfig(BaseModel): + """Nested persistence configuration for agents.""" + + agent_state: KVStoreReference + responses: ResponsesStoreReference class MetaReferenceAgentsImplConfig(BaseModel): - persistence_store: KVStoreConfig - responses_store: SqlStoreConfig + persistence: AgentPersistenceConfig @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "persistence_store": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="agents_store.db", - ), - "responses_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="responses_store.db", - ), + "persistence": { + "agent_state": KVStoreReference( + backend="kv_default", + namespace="agents", + ).model_dump(exclude_none=True), + "responses": ResponsesStoreReference( + backend="sql_default", + table_name="responses", + ).model_dump(exclude_none=True), + } } diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 851e6ef28..2360dafd9 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -359,6 +359,7 @@ class OpenAIResponsesImpl: tool_executor=self.tool_executor, safety_api=self.safety_api, guardrail_ids=guardrail_ids, + instructions=instructions, ) # Stream the response diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index caf899cdd..e80ffcdd1 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -110,6 +110,7 @@ class StreamingResponseOrchestrator: text: OpenAIResponseText, max_infer_iters: int, tool_executor, # Will be the tool execution logic from the main class + instructions: str, safety_api, guardrail_ids: list[str] | None = None, ): @@ -133,6 +134,8 @@ class StreamingResponseOrchestrator: self.accumulated_usage: OpenAIResponseUsage | None = None # Track if we've sent a refusal response self.violation_detected = False + # system message that is inserted into the model's context + self.instructions = instructions async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream: """Create a refusal response to replace streaming content.""" @@ -176,6 +179,7 @@ class StreamingResponseOrchestrator: tools=self.ctx.available_tools(), error=error, usage=self.accumulated_usage, + instructions=self.instructions, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: diff --git a/llama_stack/providers/inline/batches/reference/config.py b/llama_stack/providers/inline/batches/reference/config.py index d8d06868b..f896a897d 100644 --- a/llama_stack/providers/inline/batches/reference/config.py +++ b/llama_stack/providers/inline/batches/reference/config.py @@ -6,13 +6,13 @@ from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference class ReferenceBatchesImplConfig(BaseModel): """Configuration for the Reference Batches implementation.""" - kvstore: KVStoreConfig = Field( + kvstore: KVStoreReference = Field( description="Configuration for the key-value store backend.", ) @@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="batches.db", - ), + "kvstore": KVStoreReference( + backend="kv_default", + namespace="batches", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/llama_stack/providers/inline/datasetio/localfs/config.py index b450e8777..6e878df62 100644 --- a/llama_stack/providers/inline/datasetio/localfs/config.py +++ b/llama_stack/providers/inline/datasetio/localfs/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class LocalFSDatasetIOConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="localfs_datasetio.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="datasetio::localfs", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py index 2a4a29998..b496c855e 100644 --- a/llama_stack/providers/inline/eval/meta_reference/config.py +++ b/llama_stack/providers/inline/eval/meta_reference/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class MetaReferenceEvalConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="meta_reference_eval.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="eval", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py index 6c767af8f..0c2dd3b21 100644 --- a/llama_stack/providers/inline/files/localfs/config.py +++ b/llama_stack/providers/inline/files/localfs/config.py @@ -8,14 +8,14 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import SqlStoreReference class LocalfsFilesImplConfig(BaseModel): storage_dir: str = Field( description="Directory to store uploaded files", ) - metadata_store: SqlStoreConfig = Field( + metadata_store: SqlStoreReference = Field( description="SQL store configuration for file metadata", ) ttl_secs: int = 365 * 24 * 60 * 60 # 1 year @@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel): def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", - "metadata_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="files_metadata.db", - ), + "metadata_store": SqlStoreReference( + backend="sql_default", + table_name="files_metadata", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index 871adcb24..cb72aa13a 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl( provider_id=self.__provider_id__, metadata={ "embedding_dimension": 768, - "default_configured": True, }, model_type=ModelType.embedding, ), diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py index 09e869c90..575e5ad88 100644 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): - from llama_stack.providers.remote.vector_io.chroma.chroma import ( - ChromaVectorIOAdapter, - ) + from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py index a9566f7ff..1798f10de 100644 --- a/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/llama_stack/providers/inline/vector_io/chroma/config.py @@ -8,14 +8,14 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class ChromaVectorIOConfig(BaseModel): db_path: str - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") @classmethod def sample_run_config( @@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel): ) -> dict[str, Any]: return { "db_path": db_path, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="chroma_inline_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index c0f01bc9d..24d1f292a 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = FaissVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py index cbcbb1762..dd7a7aeca 100644 --- a/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/llama_stack/providers/inline/vector_io/faiss/config.py @@ -8,22 +8,19 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class FaissVectorIOConfig(BaseModel): - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="faiss_store.db", - ) + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::faiss", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index df0864db8..f13eb3e96 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -17,27 +17,14 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - HealthResponse, - HealthStatus, - VectorDBsProtocolPrivate, -) +from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from .config import FaissVectorIOConfig @@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex): await self._save_index() - async def query_vector( - self, - embedding: NDArray, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k) chunks = [] scores = [] @@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError( "Keyword search is not supported - underlying DB FAISS does not support this search mode" ) @@ -200,21 +177,14 @@ class FaissIndex(EmbeddingIndex): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): - def __init__( - self, - config: FaissVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: + def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.cache: dict[str, VectorDBWithIndex] = {} async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) # Load existing banks from kvstore start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" @@ -252,17 +222,11 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_db(self, vector_db: VectorDB) -> None: assert self.kvstore is not None key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set( - key=key, - value=vector_db.model_dump_json(), - ) + await self.kvstore.set(key=key, value=vector_db.model_dump_json()) # Store in cache self.cache[vector_db.identifier] = VectorDBWithIndex( @@ -285,12 +249,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr del self.cache[vector_db_id] await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = self.cache.get(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}") @@ -298,10 +257,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = self.cache.get(vector_db_id) if index is None: diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py index 46a006a91..7dc9c6a33 100644 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter - impl = MilvusVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py index 8cbd056be..b333b04ea 100644 --- a/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/llama_stack/providers/inline/vector_io/milvus/config.py @@ -8,25 +8,22 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class MilvusVectorIOConfig(BaseModel): db_path: str - kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="milvus_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py index 2863f667c..bef6d50e6 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = QdrantVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py index e15c27ea1..e7ecde7b7 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -9,23 +9,21 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class QdrantVectorIOConfig(BaseModel): path: str - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, db_name="qdrant_registry.db" - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 93921fb23..df96e927c 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index 525ed4b1f..596f8fc95 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -8,22 +8,19 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class SQLiteVectorIOConfig(BaseModel): db_path: str = Field(description="Path to the SQLite database file") - kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="sqlite_vec_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::sqlite_vec", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 8bc3b04cb..cfe23bde5 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -17,13 +17,8 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl @@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex): # Insert vector embeddings embedding_data = [ - ( - ( - chunk.chunk_id, - serialize_vector(emb.tolist()), - ) - ) + ((chunk.chunk_id, serialize_vector(emb.tolist()))) for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True) ] - cur.executemany( - f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", - embedding_data, - ) + cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data) # Insert FTS content fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks] # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT) - cur.executemany( - f"DELETE FROM [{self.fts_table}] WHERE id = ?;", - [(row[0],) for row in fts_data], - ) + cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data]) # INSERT new entries - cur.executemany( - f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", - fts_data, - ) + cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data) connection.commit() @@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex): # Run batch insertion in a background thread await asyncio.to_thread(_execute_all_batch_inserts) - async def query_vector( - self, - embedding: NDArray, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs vector-based search using a virtual table for vector similarity. """ @@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex): scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search. """ @@ -410,22 +381,15 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). """ - def __init__( - self, - config, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: + def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.cache: dict[str, VectorDBWithIndex] = {} self.vector_db_store = None async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" @@ -433,9 +397,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc for db_json in stored_vector_dbs: vector_db = VectorDB.model_validate_json(db_json) index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, - self.config.db_path, - vector_db.identifier, + vector_db.embedding_dimension, self.config.db_path, vector_db.identifier ) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) @@ -450,11 +412,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc return [v.vector_db for v in self.cache.values()] async def register_vector_db(self, vector_db: VectorDB) -> None: - index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, - self.config.db_path, - vector_db.identifier, - ) + index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/llama_stack/providers/remote/datasetio/huggingface/config.py index 38f933728..35297cb58 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/config.py +++ b/llama_stack/providers/remote/datasetio/huggingface/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class HuggingfaceDatasetIOConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="huggingface_datasetio.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="datasetio::huggingface", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/remote/files/s3/config.py b/llama_stack/providers/remote/files/s3/config.py index da20d8668..cd4b1adda 100644 --- a/llama_stack/providers/remote/files/s3/config.py +++ b/llama_stack/providers/remote/files/s3/config.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import SqlStoreReference class S3FilesImplConfig(BaseModel): @@ -24,7 +24,7 @@ class S3FilesImplConfig(BaseModel): auto_create_bucket: bool = Field( default=False, description="Automatically create the S3 bucket if it doesn't exist" ) - metadata_store: SqlStoreConfig = Field(description="SQL store configuration for file metadata") + metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata") @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -35,8 +35,8 @@ class S3FilesImplConfig(BaseModel): "aws_secret_access_key": "${env.AWS_SECRET_ACCESS_KEY:=}", "endpoint_url": "${env.S3_ENDPOINT_URL:=}", "auto_create_bucket": "${env.S3_AUTO_CREATE_BUCKET:=false}", - "metadata_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="s3_files_metadata.db", - ), + "metadata_store": SqlStoreReference( + backend="sql_default", + table_name="s3_files_metadata", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py index a6db48c43..e4b77c68d 100644 --- a/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -12,11 +12,6 @@ from .config import ChromaVectorIOConfig async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): from .chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 5792a83c6..0aa728c32 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -12,24 +12,16 @@ import chromadb from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent +from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -68,19 +60,13 @@ class ChromaIndex(EmbeddingIndex): ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks] await maybe_await( - self.collection.add( - documents=[chunk.model_dump_json() for chunk in chunks], - embeddings=embeddings, - ids=ids, - ) + self.collection.add(documents=[chunk.model_dump_json() for chunk in chunks], embeddings=embeddings, ids=ids) ) async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: results = await maybe_await( self.collection.query( - query_embeddings=[embedding.tolist()], - n_results=k, - include=["documents", "distances"], + query_embeddings=[embedding.tolist()], n_results=k, include=["documents", "distances"] ) ) distances = results["distances"][0] @@ -108,12 +94,7 @@ class ChromaIndex(EmbeddingIndex): async def delete(self): await maybe_await(self.client.delete_collection(self.collection.name)) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError("Keyword search is not supported in Chroma") async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None: @@ -137,21 +118,19 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP def __init__( self, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, - inference_api: Api.inference, - models_apis: Api.models, + inference_api: Inference, files_api: Files | None, ) -> None: super().__init__(files_api=files_api, kvstore=None) log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") self.config = config self.inference_api = inference_api - self.models_api = models_apis self.client = None self.cache = {} self.vector_db_store = None async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) self.vector_db_store = self.kvstore if isinstance(self.config, RemoteChromaVectorIOConfig): @@ -172,14 +151,10 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_db(self, vector_db: VectorDB) -> None: collection = await maybe_await( self.client.get_or_create_collection( - name=vector_db.identifier, - metadata={"vector_db": vector_db.model_dump_json()}, + name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()} ) ) self.cache[vector_db.identifier] = VectorDBWithIndex( @@ -194,12 +169,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP await self.cache[vector_db_id].index.delete() del self.cache[vector_db_id] - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = await self._get_and_cache_vector_db_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") @@ -207,10 +177,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = await self._get_and_cache_vector_db_index(vector_db_id) diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py index a1193905a..209ba90bb 100644 --- a/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/llama_stack/providers/remote/vector_io/chroma/config.py @@ -8,21 +8,21 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class ChromaVectorIOConfig(BaseModel): url: str | None - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") @classmethod def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: return { "url": url, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="chroma_remote_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py index dc5a642d6..526075bb2 100644 --- a/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -13,12 +13,6 @@ async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, Provide from .milvus import MilvusVectorIOAdapter assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" - - impl = MilvusVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py index 899d3678d..8ff9e1328 100644 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/llama_stack/providers/remote/vector_io/milvus/config.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -17,7 +17,7 @@ class MilvusVectorIOConfig(BaseModel): uri: str = Field(description="The URI of the Milvus server") token: str | None = Field(description="The token of the Milvus server") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") # This configuration allows additional fields to be passed through to the underlying Milvus client. # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. @@ -28,8 +28,8 @@ class MilvusVectorIOConfig(BaseModel): return { "uri": "${env.MILVUS_ENDPOINT}", "token": "${env.MILVUS_TOKEN}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="milvus_remote_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index d7147a7f0..d7c34163d 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -14,13 +14,8 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig @@ -74,46 +69,23 @@ class MilvusIndex(EmbeddingIndex): logger.info(f"Creating new collection {self.collection_name} with nullable sparse field") # Create schema for vector search schema = self.client.create_schema() - schema.add_field( - field_name="chunk_id", - datatype=DataType.VARCHAR, - is_primary=True, - max_length=100, - ) + schema.add_field(field_name="chunk_id", datatype=DataType.VARCHAR, is_primary=True, max_length=100) schema.add_field( field_name="content", datatype=DataType.VARCHAR, max_length=65535, enable_analyzer=True, # Enable text analysis for BM25 ) - schema.add_field( - field_name="vector", - datatype=DataType.FLOAT_VECTOR, - dim=len(embeddings[0]), - ) - schema.add_field( - field_name="chunk_content", - datatype=DataType.JSON, - ) + schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0])) + schema.add_field(field_name="chunk_content", datatype=DataType.JSON) # Add sparse vector field for BM25 (required by the function) - schema.add_field( - field_name="sparse", - datatype=DataType.SPARSE_FLOAT_VECTOR, - ) + schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR) # Create indexes index_params = self.client.prepare_index_params() - index_params.add_index( - field_name="vector", - index_type="FLAT", - metric_type="COSINE", - ) + index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE") # Add index for sparse field (required by BM25 function) - index_params.add_index( - field_name="sparse", - index_type="SPARSE_INVERTED_INDEX", - metric_type="BM25", - ) + index_params.add_index(field_name="sparse", index_type="SPARSE_INVERTED_INDEX", metric_type="BM25") # Add BM25 function for full-text search bm25_function = Function( @@ -144,11 +116,7 @@ class MilvusIndex(EmbeddingIndex): } ) try: - await asyncio.to_thread( - self.client.insert, - self.collection_name, - data=data, - ) + await asyncio.to_thread(self.client.insert, self.collection_name, data=data) except Exception as e: logger.error(f"Error inserting chunks into Milvus collection {self.collection_name}: {e}") raise e @@ -167,12 +135,7 @@ class MilvusIndex(EmbeddingIndex): scores = [res["distance"] for res in search_res[0]] return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Perform BM25-based keyword search using Milvus's built-in full-text search. """ @@ -210,12 +173,7 @@ class MilvusIndex(EmbeddingIndex): # Fallback to simple text search return await self._fallback_keyword_search(query_string, k, score_threshold) - async def _fallback_keyword_search( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def _fallback_keyword_search(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Fallback to simple text search when BM25 search is not available. """ @@ -308,7 +266,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Inference, - models_api: Models, files_api: Files | None, ) -> None: super().__init__(files_api=files_api, kvstore=None) @@ -316,12 +273,11 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache = {} self.client = None self.inference_api = inference_api - self.models_api = models_api self.vector_db_store = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) @@ -355,10 +311,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_db(self, vector_db: VectorDB) -> None: if isinstance(self.config, RemoteMilvusVectorIOConfig): consistency_level = self.config.consistency_level else: @@ -395,12 +348,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP await self.cache[vector_db_id].index.delete() del self.cache[vector_db_id] - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -408,10 +356,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py index bb4079ab5..8086b7650 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -12,6 +12,6 @@ from .config import PGVectorVectorIOConfig async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): from .pgvector import PGVectorVectorIOAdapter - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None)) + impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 334cbe5be..d81e524e4 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -22,7 +19,9 @@ class PGVectorVectorIOConfig(BaseModel): db: str | None = Field(default="postgres") user: str | None = Field(default="postgres") password: str | None = Field(default="mysecretpassword") - kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) @classmethod def sample_run_config( @@ -41,8 +40,8 @@ class PGVectorVectorIOConfig(BaseModel): "db": db, "user": user, "password": password, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="pgvector_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::pgvector", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index d55c13103..703a47843 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -16,26 +16,15 @@ from pydantic import BaseModel, TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name from .config import PGVectorVectorIOConfig @@ -205,12 +194,7 @@ class PGVectorIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring. @@ -317,7 +301,7 @@ class PGVectorIndex(EmbeddingIndex): """Remove a chunk from the PostgreSQL table.""" chunk_ids = [c.chunk_id for c in chunks_for_deletion] with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,)) + cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids)) def get_pgvector_search_function(self) -> str: return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric] @@ -341,16 +325,11 @@ class PGVectorIndex(EmbeddingIndex): class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): def __init__( - self, - config: PGVectorVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None = None, + self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.conn = None self.cache = {} self.vector_db_store = None @@ -358,7 +337,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco async def initialize(self) -> None: log.info(f"Initializing PGVector memory adapter with config: {self.config}") - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) await self.initialize_openai_vector_stores() try: @@ -407,11 +386,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorDBWithIndex( - vector_db, - index=pgvector_index, - inference_api=self.inference_api, - ) + index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api) self.cache[vector_db.identifier] = index async def unregister_vector_db(self, vector_db_id: str) -> None: @@ -424,20 +399,12 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco assert self.kvstore is not None await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}") - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = await self._get_and_cache_vector_db_index(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = await self._get_and_cache_vector_db_index(vector_db_id) return await index.query_chunks(query, params) diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py index c4942fbce..e9527f101 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -12,11 +12,6 @@ from .config import QdrantVectorIOConfig async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): from .qdrant import QdrantVectorIOAdapter - impl = QdrantVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ff5506236..01fbcc5cb 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -27,14 +24,14 @@ class QdrantVectorIOConfig(BaseModel): prefix: str | None = None timeout: int | None = None host: str | None = None - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "api_key": "${env.QDRANT_API_KEY:=}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="qdrant_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 8b90935cd..6838d69e9 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -30,11 +29,7 @@ from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig @@ -99,8 +94,7 @@ class QdrantIndex(EmbeddingIndex): chunk_ids = [convert_id(c.chunk_id) for c in chunks_for_deletion] try: await self.client.delete( - collection_name=self.collection_name, - points_selector=models.PointIdsList(points=chunk_ids), + collection_name=self.collection_name, points_selector=models.PointIdsList(points=chunk_ids) ) except Exception as e: log.error(f"Error deleting chunks from Qdrant collection {self.collection_name}: {e}") @@ -133,12 +127,7 @@ class QdrantIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError("Keyword search is not supported in Qdrant") async def query_hybrid( @@ -161,7 +150,6 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, inference_api: Inference, - models_api: Models, files_api: Files | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) @@ -169,14 +157,13 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api - self.models_api = models_api self.vector_db_store = None self._qdrant_lock = asyncio.Lock() async def initialize(self) -> None: - client_config = self.config.model_dump(exclude_none=True, exclude={"kvstore"}) + client_config = self.config.model_dump(exclude_none=True, exclude={"persistence"}) self.client = AsyncQdrantClient(**client_config) - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" @@ -184,11 +171,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP for vector_db_data in stored_vector_dbs: vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, - QdrantIndex(self.client, vector_db.identifier), - self.inference_api, - ) + index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api) self.cache[vector_db.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() @@ -197,18 +180,13 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_db(self, vector_db: VectorDB) -> None: assert self.kvstore is not None key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" await self.kvstore.set(key=key, value=vector_db.model_dump_json()) index = VectorDBWithIndex( - vector_db=vector_db, - index=QdrantIndex(self.client, vector_db.identifier), - inference_api=self.inference_api, + vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api ) self.cache[vector_db.identifier] = index @@ -240,12 +218,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache[vector_db_id] = index return index - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -253,10 +226,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 2040dad96..12e11d013 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -12,11 +12,6 @@ from .config import WeaviateVectorIOConfig async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): from .weaviate import WeaviateVectorIOAdapter - impl = WeaviateVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py index b693e294e..66dbf1fed 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -19,19 +16,17 @@ from llama_stack.schema_utils import json_schema_type class WeaviateVectorIOConfig(BaseModel): weaviate_api_key: str | None = Field(description="The API key for the Weaviate instance", default=None) weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080") - kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) @classmethod - def sample_run_config( - cls, - __distro_dir__: str, - **kwargs: Any, - ) -> dict[str, Any]: + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "weaviate_api_key": None, "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="weaviate_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::weaviate", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index d8b11c441..8e7eb7267 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -16,7 +16,6 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.core.request_headers import NeedsRequestProviderData @@ -24,9 +23,7 @@ from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.openai_vector_store_mixin import ( - OpenAIVectorStoreMixin, -) +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, @@ -48,12 +45,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten class WeaviateIndex(EmbeddingIndex): - def __init__( - self, - client: weaviate.WeaviateClient, - collection_name: str, - kvstore: KVStore | None = None, - ): + def __init__(self, client: weaviate.WeaviateClient, collection_name: str, kvstore: KVStore | None = None): self.client = client self.collection_name = sanitize_collection_name(collection_name, weaviate_format=True) self.kvstore = kvstore @@ -108,9 +100,7 @@ class WeaviateIndex(EmbeddingIndex): try: results = collection.query.near_vector( - near_vector=embedding.tolist(), - limit=k, - return_metadata=wvc.query.MetadataQuery(distance=True), + near_vector=embedding.tolist(), limit=k, return_metadata=wvc.query.MetadataQuery(distance=True) ) except Exception as e: log.error(f"Weaviate client vector search failed: {e}") @@ -153,12 +143,7 @@ class WeaviateIndex(EmbeddingIndex): collection = self.client.collections.get(sanitized_collection_name) collection.data.delete_many(where=Filter.by_property("id").contains_any(chunk_ids)) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs BM25-based keyword search using Weaviate's built-in full-text search. Args: @@ -175,9 +160,7 @@ class WeaviateIndex(EmbeddingIndex): # Perform BM25 keyword search on chunk_content field try: results = collection.query.bm25( - query=query_string, - limit=k, - return_metadata=wvc.query.MetadataQuery(score=True), + query=query_string, limit=k, return_metadata=wvc.query.MetadataQuery(score=True) ) except Exception as e: log.error(f"Weaviate client keyword search failed: {e}") @@ -274,23 +257,11 @@ class WeaviateIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) -class WeaviateVectorIOAdapter( - OpenAIVectorStoreMixin, - VectorIO, - NeedsRequestProviderData, - VectorDBsProtocolPrivate, -): - def __init__( - self, - config: WeaviateVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: +class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate): + def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.client_cache = {} self.cache = {} self.vector_db_store = None @@ -301,10 +272,7 @@ class WeaviateVectorIOAdapter( log.info("Using Weaviate locally in container") host, port = self.config.weaviate_cluster_url.split(":") key = "local_test" - client = weaviate.connect_to_local( - host=host, - port=port, - ) + client = weaviate.connect_to_local(host=host, port=port) else: log.info("Using Weaviate remote cluster with URL") key = f"{self.config.weaviate_cluster_url}::{self.config.weaviate_api_key}" @@ -320,8 +288,8 @@ class WeaviateVectorIOAdapter( async def initialize(self) -> None: """Set up KV store and load existing vector DBs and OpenAI vector stores.""" # Initialize KV store for metadata if configured - if self.config.kvstore is not None: - self.kvstore = await kvstore_impl(self.config.kvstore) + if self.config.persistence is not None: + self.kvstore = await kvstore_impl(self.config.persistence) else: self.kvstore = None log.info("No kvstore configured, registry will not persist across restarts") @@ -334,15 +302,9 @@ class WeaviateVectorIOAdapter( for raw in stored: vector_db = VectorDB.model_validate_json(raw) client = self._get_client() - idx = WeaviateIndex( - client=client, - collection_name=vector_db.identifier, - kvstore=self.kvstore, - ) + idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore) self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, - index=idx, - inference_api=self.inference_api, + vector_db=vector_db, index=idx, inference_api=self.inference_api ) # Load OpenAI vector stores metadata into cache @@ -354,10 +316,7 @@ class WeaviateVectorIOAdapter( # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_db(self, vector_db: VectorDB) -> None: client = self._get_client() sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) # Create collection if it doesn't exist @@ -366,17 +325,12 @@ class WeaviateVectorIOAdapter( name=sanitized_collection_name, vectorizer_config=wvc.config.Configure.Vectorizer.none(), properties=[ - wvc.config.Property( - name="chunk_content", - data_type=wvc.config.DataType.TEXT, - ), + wvc.config.Property(name="chunk_content", data_type=wvc.config.DataType.TEXT), ], ) self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, - WeaviateIndex(client=client, collection_name=sanitized_collection_name), - self.inference_api, + vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api ) async def unregister_vector_db(self, vector_db_id: str) -> None: @@ -412,12 +366,7 @@ class WeaviateVectorIOAdapter( self.cache[vector_db_id] = index return index - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -425,10 +374,7 @@ class WeaviateVectorIOAdapter( await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = await self._get_and_cache_vector_db_index(vector_db_id) if not index: diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py index 901f77c67..8e20bca6b 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/llama_stack/providers/utils/inference/inference_store.py @@ -15,12 +15,13 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, Order, ) -from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl +from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl logger = get_logger(name=__name__, category="inference") @@ -28,33 +29,32 @@ logger = get_logger(name=__name__, category="inference") class InferenceStore: def __init__( self, - config: InferenceStoreConfig | SqlStoreConfig, + reference: InferenceStoreReference, policy: list[AccessRule], ): - # Handle backward compatibility - if not isinstance(config, InferenceStoreConfig): - # Legacy: SqlStoreConfig passed directly as config - config = InferenceStoreConfig( - sql_store_config=config, - ) - - self.config = config - self.sql_store_config = config.sql_store_config + self.reference = reference self.sql_store = None self.policy = policy - # Disable write queue for SQLite to avoid concurrency issues - self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite - # Async write queue and worker control self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = config.max_write_queue_size - self._num_writers: int = max(1, config.num_writers) + self._max_write_queue_size: int = reference.max_write_queue_size + self._num_writers: int = max(1, reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) + base_store = sqlstore_impl(self.reference) + self.sql_store = AuthorizedSqlStore(base_store, self.policy) + + # Disable write queue for SQLite to avoid concurrency issues + backend_name = self.reference.backend + backend_config = _SQLSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError( + f"Unregistered SQL backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + self.enable_write_queue = backend_config.type != StorageBackendType.SQL_SQLITE await self.sql_store.create_table( "chat_completions", { diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index 7b6a79350..c0582abc4 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -4,143 +4,20 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import re -from enum import Enum -from typing import Annotated, Literal +from typing import Annotated -from pydantic import BaseModel, Field, field_validator - -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR - - -class KVStoreType(Enum): - redis = "redis" - sqlite = "sqlite" - postgres = "postgres" - mongodb = "mongodb" - - -class CommonConfig(BaseModel): - namespace: str | None = Field( - default=None, - description="All keys will be prefixed with this namespace", - ) - - -class RedisKVStoreConfig(CommonConfig): - type: Literal["redis"] = KVStoreType.redis.value - host: str = "localhost" - port: int = 6379 - - @property - def url(self) -> str: - return f"redis://{self.host}:{self.port}" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["redis"] - - @classmethod - def sample_run_config(cls): - return { - "type": "redis", - "host": "${env.REDIS_HOST:=localhost}", - "port": "${env.REDIS_PORT:=6379}", - } - - -class SqliteKVStoreConfig(CommonConfig): - type: Literal["sqlite"] = KVStoreType.sqlite.value - db_path: str = Field( - default=(RUNTIME_BASE_DIR / "kvstore.db").as_posix(), - description="File path for the sqlite database", - ) - - @classmethod - def pip_packages(cls) -> list[str]: - return ["aiosqlite"] - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): - return { - "type": "sqlite", - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - -class PostgresKVStoreConfig(CommonConfig): - type: Literal["postgres"] = KVStoreType.postgres.value - host: str = "localhost" - port: int = 5432 - db: str = "llamastack" - user: str - password: str | None = None - ssl_mode: str | None = None - ca_cert_path: str | None = None - table_name: str = "llamastack_kvstore" - - @classmethod - def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): - return { - "type": "postgres", - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", - } - - @classmethod - @field_validator("table_name") - def validate_table_name(cls, v: str) -> str: - # PostgreSQL identifiers rules: - # - Must start with a letter or underscore - # - Can contain letters, numbers, and underscores - # - Maximum length is 63 bytes - pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" - if not re.match(pattern, v): - raise ValueError( - "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" - ) - if len(v) > 63: - raise ValueError("Table name must be less than 63 characters") - return v - - @classmethod - def pip_packages(cls) -> list[str]: - return ["psycopg2-binary"] - - -class MongoDBKVStoreConfig(CommonConfig): - type: Literal["mongodb"] = KVStoreType.mongodb.value - host: str = "localhost" - port: int = 27017 - db: str = "llamastack" - user: str | None = None - password: str | None = None - collection_name: str = "llamastack_kvstore" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["pymongo"] - - @classmethod - def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): - return { - "type": "mongodb", - "host": "${env.MONGODB_HOST:=localhost}", - "port": "${env.MONGODB_PORT:=5432}", - "db": "${env.MONGODB_DB}", - "user": "${env.MONGODB_USER}", - "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", - } +from pydantic import Field +from llama_stack.core.storage.datatypes import ( + MongoDBKVStoreConfig, + PostgresKVStoreConfig, + RedisKVStoreConfig, + SqliteKVStoreConfig, + StorageBackendType, +) KVStoreConfig = Annotated[ - RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, - Field(discriminator="type", default=KVStoreType.sqlite.value), + RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type") ] @@ -148,13 +25,13 @@ def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]: """Get pip packages for KV store config, handling both dict and object cases.""" if isinstance(store_config, dict): store_type = store_config.get("type") - if store_type == "sqlite": + if store_type == StorageBackendType.KV_SQLITE.value: return SqliteKVStoreConfig.pip_packages() - elif store_type == "postgres": + elif store_type == StorageBackendType.KV_POSTGRES.value: return PostgresKVStoreConfig.pip_packages() - elif store_type == "redis": + elif store_type == StorageBackendType.KV_REDIS.value: return RedisKVStoreConfig.pip_packages() - elif store_type == "mongodb": + elif store_type == StorageBackendType.KV_MONGODB.value: return MongoDBKVStoreConfig.pip_packages() else: raise ValueError(f"Unknown KV store type: {store_type}") diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/llama_stack/providers/utils/kvstore/kvstore.py index 426523d8e..eee51e5d9 100644 --- a/llama_stack/providers/utils/kvstore/kvstore.py +++ b/llama_stack/providers/utils/kvstore/kvstore.py @@ -4,9 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from __future__ import annotations + +from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType from .api import KVStore -from .config import KVStoreConfig, KVStoreType +from .config import KVStoreConfig def kvstore_dependencies(): @@ -44,20 +52,41 @@ class InmemoryKVStoreImpl(KVStore): del self._store[key] -async def kvstore_impl(config: KVStoreConfig) -> KVStore: - if config.type == KVStoreType.redis.value: +_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} + + +def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available KV store backends for reference resolution.""" + global _KVSTORE_BACKENDS + + _KVSTORE_BACKENDS.clear() + for name, cfg in backends.items(): + _KVSTORE_BACKENDS[name] = cfg + + +async def kvstore_impl(reference: KVStoreReference) -> KVStore: + backend_name = reference.backend + + backend_config = _KVSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") + + config = backend_config.model_copy() + config.namespace = reference.namespace + + if config.type == StorageBackendType.KV_REDIS.value: from .redis import RedisKVStoreImpl impl = RedisKVStoreImpl(config) - elif config.type == KVStoreType.sqlite.value: + elif config.type == StorageBackendType.KV_SQLITE.value: from .sqlite import SqliteKVStoreImpl impl = SqliteKVStoreImpl(config) - elif config.type == KVStoreType.postgres.value: + elif config.type == StorageBackendType.KV_POSTGRES.value: from .postgres import PostgresKVStoreImpl impl = PostgresKVStoreImpl(config) - elif config.type == KVStoreType.mongodb.value: + elif config.type == StorageBackendType.KV_MONGODB.value: from .mongodb import MongoDBKVStoreImpl impl = MongoDBKVStoreImpl(config) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 0e550434e..7806d98c1 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -17,7 +17,6 @@ from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.models import Model, Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -81,13 +80,14 @@ class OpenAIVectorStoreMixin(ABC): # Implementing classes should call super().__init__() in their __init__ method # to properly initialize the mixin attributes. def __init__( - self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None + self, + files_api: Files | None = None, + kvstore: KVStore | None = None, ): self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_file_batches: dict[str, dict[str, Any]] = {} self.files_api = files_api self.kvstore = kvstore - self.models_api = models_api self._last_file_batch_cleanup_time = 0 self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} @@ -393,21 +393,7 @@ class OpenAIVectorStoreMixin(ABC): vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") if embedding_model is None: - result = await self._get_default_embedding_model_and_dimension() - if result is None: - raise ValueError( - "embedding_model is required in extra_body when creating a vector store. " - "No default embedding model could be determined automatically." - ) - embedding_model, embedding_dimension = result - elif embedding_dimension is None: - # Embedding model was provided but dimension wasn't, look it up - embedding_dimension = await self._get_embedding_dimension_for_model(embedding_model) - if embedding_dimension is None: - raise ValueError( - f"Could not determine embedding dimension for model '{embedding_model}'. " - "Please provide embedding_dimension in extra_body or ensure the model metadata contains embedding_dimension." - ) + raise ValueError("embedding_model is required") if embedding_dimension is None: raise ValueError("Embedding dimension is required") @@ -474,85 +460,6 @@ class OpenAIVectorStoreMixin(ABC): store_info = self.openai_vector_stores[vector_db_id] return VectorStoreObject.model_validate(store_info) - async def _get_embedding_models(self) -> list[Model]: - """Get list of embedding models from the models API.""" - if not self.models_api: - return [] - - models_response = await self.models_api.list_models() - models_list = models_response.data if hasattr(models_response, "data") else models_response - - embedding_models = [] - for model in models_list: - if not isinstance(model, Model): - logger.warning(f"Non-Model object found in models list: {type(model)} - {model}") - continue - if model.model_type == "embedding": - embedding_models.append(model) - - return embedding_models - - async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None: - """Get embedding dimension for a specific model by looking it up in the models API. - - Args: - model_id: The identifier of the embedding model (supports both prefixed and non-prefixed) - - Returns: - The embedding dimension for the model, or None if not found - """ - embedding_models = await self._get_embedding_models() - - for model in embedding_models: - # Check for exact match first - if model.identifier == model_id: - embedding_dimension = model.metadata.get("embedding_dimension") - if embedding_dimension is not None: - return int(embedding_dimension) - else: - logger.warning(f"Model {model_id} found but has no embedding_dimension in metadata") - return None - - # Check for prefixed/unprefixed variations - # If model_id is unprefixed, check if it matches the resource_id - if model.provider_resource_id == model_id: - embedding_dimension = model.metadata.get("embedding_dimension") - if embedding_dimension is not None: - return int(embedding_dimension) - - return None - - async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None: - """Get default embedding model from the models API. - - Looks for embedding models marked with default_configured=True in metadata. - Returns None if no default embedding model is found. - Raises ValueError if multiple defaults are found. - """ - embedding_models = await self._get_embedding_models() - - default_models = [] - for model in embedding_models: - if model.metadata.get("default_configured") is True: - default_models.append(model.identifier) - - if len(default_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_models}. " - "Only one embedding model can be marked as default." - ) - - if default_models: - model_id = default_models[0] - embedding_dimension = await self._get_embedding_dimension_for_model(model_id) - if embedding_dimension is None: - raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata") - logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}") - return model_id, embedding_dimension - - logger.debug("No default embedding models found") - return None - async def openai_list_vector_stores( self, limit: int | None = 20, diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 36370b492..d5c243252 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -18,13 +18,13 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectWithInput, ) from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl +from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl logger = get_logger(name=__name__, category="openai_responses") @@ -45,39 +45,38 @@ class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput): class ResponsesStore: def __init__( self, - config: ResponsesStoreConfig | SqlStoreConfig, + reference: ResponsesStoreReference | SqlStoreReference, policy: list[AccessRule], ): - # Handle backward compatibility - if not isinstance(config, ResponsesStoreConfig): - # Legacy: SqlStoreConfig passed directly as config - config = ResponsesStoreConfig( - sql_store_config=config, - ) + if isinstance(reference, ResponsesStoreReference): + self.reference = reference + else: + self.reference = ResponsesStoreReference(**reference.model_dump()) - self.config = config - self.sql_store_config = config.sql_store_config - if not self.sql_store_config: - self.sql_store_config = SqliteSqlStoreConfig( - db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), - ) - self.sql_store = None self.policy = policy - - # Disable write queue for SQLite to avoid concurrency issues - self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + self.sql_store = None + self.enable_write_queue = True # Async write queue and worker control self._queue: ( asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None ) = None self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = config.max_write_queue_size - self._num_writers: int = max(1, config.num_writers) + self._max_write_queue_size: int = self.reference.max_write_queue_size + self._num_writers: int = max(1, self.reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) + base_store = sqlstore_impl(self.reference) + self.sql_store = AuthorizedSqlStore(base_store, self.policy) + + backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) + if backend_config is None: + raise ValueError( + f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + if backend_config.type == StorageBackendType.SQL_SQLITE: + self.enable_write_queue = False await self.sql_store.create_table( "openai_responses", { diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index e1da4db6e..3dfc82677 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -12,10 +12,10 @@ from llama_stack.core.access_control.conditions import ProtectedResource from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope from llama_stack.core.datatypes import User from llama_stack.core.request_headers import get_authenticated_user +from llama_stack.core.storage.datatypes import StorageBackendType from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore -from .sqlstore import SqlStoreType logger = get_logger(name=__name__, category="providers::utils") @@ -82,8 +82,8 @@ class AuthorizedSqlStore: if not hasattr(self.sql_store, "config"): raise ValueError("SqlStore must have a config attribute to be used with AuthorizedSqlStore") - self.database_type = self.sql_store.config.type - if self.database_type not in [SqlStoreType.postgres, SqlStoreType.sqlite]: + self.database_type = self.sql_store.config.type.value + if self.database_type not in [StorageBackendType.SQL_POSTGRES.value, StorageBackendType.SQL_SQLITE.value]: raise ValueError(f"Unsupported database type: {self.database_type}") def _validate_sql_optimized_policy(self) -> None: @@ -220,9 +220,9 @@ class AuthorizedSqlStore: Returns: SQL expression to extract JSON value """ - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: return f"{column}->'{path}'" - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: return f"JSON_EXTRACT({column}, '$.{path}')" else: raise ValueError(f"Unsupported database type: {self.database_type}") @@ -237,9 +237,9 @@ class AuthorizedSqlStore: Returns: SQL expression to extract JSON value as text """ - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: return f"{column}->>'{path}'" - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: return f"JSON_EXTRACT({column}, '$.{path}')" else: raise ValueError(f"Unsupported database type: {self.database_type}") @@ -248,10 +248,10 @@ class AuthorizedSqlStore: """Get the SQL conditions for public access.""" # Public records are records that have no owner_principal or access_attributes conditions = ["owner_principal = ''"] - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: # Postgres stores JSON null as 'null' conditions.append("access_attributes::text = 'null'") - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: conditions.append("access_attributes = 'null'") else: raise ValueError(f"Unsupported database type: {self.database_type}") diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 23cd6444e..c1ccd73dd 100644 --- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -26,10 +26,10 @@ from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.sql.elements import ColumnElement from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, SqlStore -from .sqlstore import SqlAlchemySqlStoreConfig logger = get_logger(name=__name__, category="providers::utils") diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index fc44402ae..31801c4ca 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -4,90 +4,28 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from abc import abstractmethod -from enum import StrEnum -from pathlib import Path -from typing import Annotated, Literal +from typing import Annotated, cast -from pydantic import BaseModel, Field +from pydantic import Field -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.storage.datatypes import ( + PostgresSqlStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageBackendType, +) from .api import SqlStore sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] - -class SqlStoreType(StrEnum): - sqlite = "sqlite" - postgres = "postgres" - - -class SqlAlchemySqlStoreConfig(BaseModel): - @property - @abstractmethod - def engine_str(self) -> str: ... - - # TODO: move this when we have a better way to specify dependencies with internal APIs - @classmethod - def pip_packages(cls) -> list[str]: - return ["sqlalchemy[asyncio]"] - - -class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[SqlStoreType.sqlite] = SqlStoreType.sqlite - db_path: str = Field( - default=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), - description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", - ) - - @property - def engine_str(self) -> str: - return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): - return { - "type": "sqlite", - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["aiosqlite"] - - -class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[SqlStoreType.postgres] = SqlStoreType.postgres - host: str = "localhost" - port: int = 5432 - db: str = "llamastack" - user: str - password: str | None = None - - @property - def engine_str(self) -> str: - return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["asyncpg"] - - @classmethod - def sample_run_config(cls, **kwargs): - return { - "type": "postgres", - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - } +_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} SqlStoreConfig = Annotated[ SqliteSqlStoreConfig | PostgresSqlStoreConfig, - Field(discriminator="type", default=SqlStoreType.sqlite.value), + Field(discriminator="type"), ] @@ -95,9 +33,9 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: """Get pip packages for SQL store config, handling both dict and object cases.""" if isinstance(store_config, dict): store_type = store_config.get("type") - if store_type == "sqlite": + if store_type == StorageBackendType.SQL_SQLITE.value: return SqliteSqlStoreConfig.pip_packages() - elif store_type == "postgres": + elif store_type == StorageBackendType.SQL_POSTGRES.value: return PostgresSqlStoreConfig.pip_packages() else: raise ValueError(f"Unknown SQL store type: {store_type}") @@ -105,12 +43,28 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: return store_config.pip_packages() -def sqlstore_impl(config: SqlStoreConfig) -> SqlStore: - if config.type in [SqlStoreType.sqlite, SqlStoreType.postgres]: +def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: + backend_name = reference.backend + + backend_config = _SQLSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError( + f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + + if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl - impl = SqlAlchemySqlStoreImpl(config) + config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() + return SqlAlchemySqlStoreImpl(config) else: - raise ValueError(f"Unknown sqlstore type {config.type}") + raise ValueError(f"Unknown sqlstore type {backend_config.type}") - return impl + +def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available SQL store backends for reference resolution.""" + global _SQLSTORE_BACKENDS + + _SQLSTORE_BACKENDS.clear() + for name, cfg in backends.items(): + _SQLSTORE_BACKENDS[name] = cfg diff --git a/scripts/docker.sh b/scripts/docker.sh index 1ba1d9adf..7a5c3e6e0 100755 --- a/scripts/docker.sh +++ b/scripts/docker.sh @@ -236,7 +236,7 @@ start_container() { echo "=== Starting Docker Container ===" # Get the repo root for volume mount - SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd) REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) # Determine the actual image name (may have localhost/ prefix) diff --git a/tests/external/run-byoa.yaml b/tests/external/run-byoa.yaml index 5774ae9da..4d63046c6 100644 --- a/tests/external/run-byoa.yaml +++ b/tests/external/run-byoa.yaml @@ -7,6 +7,24 @@ providers: - provider_id: kaze provider_type: remote::kaze config: {} +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default external_apis_dir: ~/.llama/apis.d external_providers_dir: ~/.llama/providers.d server: diff --git a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json index 4d4331740..067b7d254 100644 --- a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json +++ b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json @@ -548,5 +548,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json new file mode 100644 index 000000000..aa61b7dbe --- /dev/null +++ b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json @@ -0,0 +1,447 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_5qverjg6", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_5qverjg6", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": { + "type": "function", + "function": { + "name": "get_boiling_point" + } + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json new file mode 100644 index 000000000..3cf297c34 --- /dev/null +++ b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json @@ -0,0 +1,888 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant and speak in pirate language." + }, + { + "role": "user", + "content": "What is the capital of France?" + }, + { + "role": "assistant", + "content": "The capital of France is Paris." + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " Yer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " look", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " fer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " port", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " o", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " eh", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " That", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " one", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "!", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " Yer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " won", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'t", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " go", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " astr", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "ay", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " answer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " mate", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "y", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 32, + "prompt_tokens": 50, + "total_tokens": 82, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json index d606edb37..7efea91ba 100644 --- a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json +++ b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json new file mode 100644 index 000000000..b899e0c2d --- /dev/null +++ b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json @@ -0,0 +1,256 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 8, + "prompt_tokens": 32, + "total_tokens": 40, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json index b8b22f51d..407ac0655 100644 --- a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json +++ b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json index 4d7a1d1e4..241fb6127 100644 --- a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json +++ b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json @@ -55,7 +55,7 @@ "choices": [ { "delta": { - "content": "'m", + "content": "'d", "function_call": null, "refusal": null, "role": "assistant", @@ -81,7 +81,7 @@ "choices": [ { "delta": { - "content": " not", + "content": " be", "function_call": null, "refusal": null, "role": "assistant", @@ -107,7 +107,7 @@ "choices": [ { "delta": { - "content": " able", + "content": " happy", "function_call": null, "refusal": null, "role": "assistant", @@ -159,7 +159,7 @@ "choices": [ { "delta": { - "content": " provide", + "content": " help", "function_call": null, "refusal": null, "role": "assistant", @@ -185,7 +185,7 @@ "choices": [ { "delta": { - "content": " real", + "content": " you", "function_call": null, "refusal": null, "role": "assistant", @@ -211,7 +211,59 @@ "choices": [ { "delta": { - "content": "-time", + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", "function_call": null, "refusal": null, "role": "assistant", @@ -282,6 +334,58 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -393,189 +497,7 @@ "choices": [ { "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " tell", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " you", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " that", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " Japan", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " has", + "content": "'m", "function_call": null, "refusal": null, "role": "assistant", @@ -627,7 +549,7 @@ "choices": [ { "delta": { - "content": " humid", + "content": " large", "function_call": null, "refusal": null, "role": "assistant", @@ -653,7 +575,7 @@ "choices": [ { "delta": { - "content": " subt", + "content": " language", "function_call": null, "refusal": null, "role": "assistant", @@ -679,7 +601,7 @@ "choices": [ { "delta": { - "content": "ropical", + "content": " model", "function_call": null, "refusal": null, "role": "assistant", @@ -705,7 +627,7 @@ "choices": [ { "delta": { - "content": " climate", + "content": ",", "function_call": null, "refusal": null, "role": "assistant", @@ -731,7 +653,7 @@ "choices": [ { "delta": { - "content": " with", + "content": " I", "function_call": null, "refusal": null, "role": "assistant", @@ -757,7 +679,7 @@ "choices": [ { "delta": { - "content": " hot", + "content": " don", "function_call": null, "refusal": null, "role": "assistant", @@ -783,7 +705,7 @@ "choices": [ { "delta": { - "content": " summers", + "content": "'t", "function_call": null, "refusal": null, "role": "assistant", @@ -809,7 +731,7 @@ "choices": [ { "delta": { - "content": " and", + "content": " have", "function_call": null, "refusal": null, "role": "assistant", @@ -835,7 +757,7 @@ "choices": [ { "delta": { - "content": " cold", + "content": " real", "function_call": null, "refusal": null, "role": "assistant", @@ -861,7 +783,7 @@ "choices": [ { "delta": { - "content": " winters", + "content": "-time", "function_call": null, "refusal": null, "role": "assistant", @@ -887,111 +809,7 @@ "choices": [ { "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "If", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " you", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "'d", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " like", + "content": " access", "function_call": null, "refusal": null, "role": "assistant", @@ -1043,7 +861,449 @@ "choices": [ { "delta": { - "content": " know", + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " conditions", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "That", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " being", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " said", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " suggest", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " some", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " ways", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " out", "function_call": null, "refusal": null, "role": "assistant", @@ -1147,59 +1407,7 @@ "choices": [ { "delta": { - "content": " or", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " forecast", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " for", + "content": " in", "function_call": null, "refusal": null, "role": "assistant", @@ -1251,7 +1459,7 @@ "choices": [ { "delta": { - "content": ",", + "content": ":\n\n", "function_call": null, "refusal": null, "role": "assistant", @@ -1277,7 +1485,7 @@ "choices": [ { "delta": { - "content": " I", + "content": "1", "function_call": null, "refusal": null, "role": "assistant", @@ -1303,7 +1511,7 @@ "choices": [ { "delta": { - "content": " recommend", + "content": ".", "function_call": null, "refusal": null, "role": "assistant", @@ -1329,59 +1537,7 @@ "choices": [ { "delta": { - "content": " checking", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " reliable", + "content": " Check", "function_call": null, "refusal": null, "role": "assistant", @@ -1433,7 +1589,7 @@ "choices": [ { "delta": { - "content": " source", + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -1459,7 +1615,7 @@ "choices": [ { "delta": { - "content": " such", + "content": " websites", "function_call": null, "refusal": null, "role": "assistant", @@ -1485,7 +1641,7 @@ "choices": [ { "delta": { - "content": " as", + "content": ":", "function_call": null, "refusal": null, "role": "assistant", @@ -1511,7 +1667,7 @@ "choices": [ { "delta": { - "content": ":\n\n", + "content": " You", "function_call": null, "refusal": null, "role": "assistant", @@ -1537,7 +1693,7 @@ "choices": [ { "delta": { - "content": "*", + "content": " can", "function_call": null, "refusal": null, "role": "assistant", @@ -1563,7 +1719,267 @@ "choices": [ { "delta": { - "content": " The", + "content": " check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " websites", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Acc", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "u", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".com", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", "function_call": null, "refusal": null, "role": "assistant", @@ -1797,7 +2213,397 @@ "choices": [ { "delta": { - "content": " website", + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " condition", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " forecast", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "2", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Use", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " mobile", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " app", "function_call": null, "refusal": null, "role": "assistant", @@ -1849,7 +2655,7 @@ "choices": [ { "delta": { - "content": " \n", + "content": " provide", "function_call": null, "refusal": null, "role": "assistant", @@ -2057,7 +2863,7 @@ "choices": [ { "delta": { - "content": "*", + "content": " real", "function_call": null, "refusal": null, "role": "assistant", @@ -2083,7 +2889,7 @@ "choices": [ { "delta": { - "content": " Acc", + "content": "-time", "function_call": null, "refusal": null, "role": "assistant", @@ -2109,7 +2915,7 @@ "choices": [ { "delta": { - "content": "u", + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -2135,7 +2941,7 @@ "choices": [ { "delta": { - "content": "Weather", + "content": " information", "function_call": null, "refusal": null, "role": "assistant", @@ -2161,7 +2967,7 @@ "choices": [ { "delta": { - "content": ":", + "content": ",", "function_call": null, "refusal": null, "role": "assistant", @@ -2187,7 +2993,7 @@ "choices": [ { "delta": { - "content": " https", + "content": " such", "function_call": null, "refusal": null, "role": "assistant", @@ -2213,189 +3019,7 @@ "choices": [ { "delta": { - "content": "://", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "www", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ".acc", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "u", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ".com", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "/\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "*", + "content": " as", "function_call": null, "refusal": null, "role": "assistant", @@ -2466,6 +3090,240 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Underground", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "3", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " social", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " media", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -2499,7 +3357,7 @@ "choices": [ { "delta": { - "content": " https", + "content": " You", "function_call": null, "refusal": null, "role": "assistant", @@ -2525,7 +3383,7 @@ "choices": [ { "delta": { - "content": "://", + "content": " can", "function_call": null, "refusal": null, "role": "assistant", @@ -2551,7 +3409,7 @@ "choices": [ { "delta": { - "content": "dark", + "content": " also", "function_call": null, "refusal": null, "role": "assistant", @@ -2577,7 +3435,7 @@ "choices": [ { "delta": { - "content": "sky", + "content": " check", "function_call": null, "refusal": null, "role": "assistant", @@ -2603,7 +3461,7 @@ "choices": [ { "delta": { - "content": ".net", + "content": " social", "function_call": null, "refusal": null, "role": "assistant", @@ -2629,7 +3487,7 @@ "choices": [ { "delta": { - "content": "/\n\n", + "content": " media", "function_call": null, "refusal": null, "role": "assistant", @@ -2655,7 +3513,7 @@ "choices": [ { "delta": { - "content": "Please", + "content": " platforms", "function_call": null, "refusal": null, "role": "assistant", @@ -2681,7 +3539,215 @@ "choices": [ { "delta": { - "content": " keep", + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Twitter", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Facebook", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " updates", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -2733,7 +3799,85 @@ "choices": [ { "delta": { - "content": " mind", + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "Please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " note", "function_call": null, "refusal": null, "role": "assistant", @@ -2778,6 +3922,578 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " knowledge", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " cutoff", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " December", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "202", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "3", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " so", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " may", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " have", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " most", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " up", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "-to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "-date", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -2830,526 +4546,6 @@ "usage": null } }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " change", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " quickly", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " it", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " always", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " good", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " idea", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " check", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " latest", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " forecast", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " before", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " planning", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " your", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " activities", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -3413,9 +4609,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 131, + "completion_tokens": 176, "prompt_tokens": 32, - "total_tokens": 163, + "total_tokens": 208, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json new file mode 100644 index 000000000..4c0fa6cce --- /dev/null +++ b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_klhbln13", + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_klhbln13", + "content": "-212" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "212", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " degrees", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " Celsius", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json index 992648658..9f9397057 100644 --- a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json +++ b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json @@ -56,7 +56,7 @@ "tool_calls": [ { "index": 0, - "id": "call_os3xa9go", + "id": "call_6nqo069h", "function": { "arguments": "{\"city\":\"Tokyo\"}", "name": "get_weather" @@ -115,9 +115,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 15, + "completion_tokens": 18, "prompt_tokens": 179, - "total_tokens": 194, + "total_tokens": 197, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json new file mode 100644 index 000000000..21d5a0663 --- /dev/null +++ b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json @@ -0,0 +1,416 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant Always respond with tool calls no matter what. " + }, + { + "role": "user", + "content": "Get the boiling point of polyjuice with a tool call." + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_9x4z21g1", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_9x4z21g1", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " Poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json index a94c52c72..9a1781046 100644 --- a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json +++ b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json index 3699fbc8b..3a1f57ee8 100644 --- a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json +++ b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_ixvkq8fh", + "id": "call_icfpgg5q", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json index 4f001f5bf..0a27ddb7d 100644 --- a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json +++ b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json new file mode 100644 index 000000000..bfbbcb87b --- /dev/null +++ b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_icfpgg5q", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_icfpgg5q", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json index 89fa490c3..755276918 100644 --- a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json +++ b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json @@ -45,7 +45,33 @@ "choices": [ { "delta": { - "content": "The", + "content": "Italy", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "'s", "function_call": null, "refusal": null, "role": "assistant", @@ -90,58 +116,6 @@ "usage": null } }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-4da32cdf48ae", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-4da32cdf48ae", - "choices": [ - { - "delta": { - "content": " Italy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -194,6 +168,1124 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " seat", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " EU", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " well", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " has", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " been", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " centuries", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " significant", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " role", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " international", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " politics", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " being", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " an", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " important", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " location", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " various", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " historical", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " events", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " such", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " signing", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " treaty", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " West", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "ph", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "alia", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -257,9 +1349,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 8, + "completion_tokens": 50, "prompt_tokens": 82, - "total_tokens": 90, + "total_tokens": 132, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json index cac9a6db2..988b270d7 100644 --- a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json +++ b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json @@ -48,7 +48,7 @@ "tool_calls": [ { "index": 0, - "id": "call_lqrdy0rt", + "id": "call_x427af31", "function": { "arguments": "{}", "name": "get_current_time" @@ -107,9 +107,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 14, + "completion_tokens": 12, "prompt_tokens": 161, - "total_tokens": 175, + "total_tokens": 173, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json index 49ca098d5..009646e27 100644 --- a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json +++ b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json @@ -56,7 +56,7 @@ "tool_calls": [ { "index": 0, - "id": "call_4ibtjudr", + "id": "call_wkjhgmpf", "function": { "arguments": "{\"city\":\"Tokyo\"}", "name": "get_weather" diff --git a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json index 298e0e3b8..8b8f04ae6 100644 --- a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json +++ b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_pojpzwm8", + "id": "call_klhbln13", "function": { "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point_with_metadata" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json index fc263d5e9..d5d249587 100644 --- a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json +++ b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json @@ -81,33 +81,7 @@ "choices": [ { "delta": { - "content": " not", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " able", + "content": " happy", "function_call": null, "refusal": null, "role": "assistant", @@ -159,267 +133,7 @@ "choices": [ { "delta": { - "content": " provide", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " real", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "-time", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " information", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " However", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " I", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " give", + "content": " help", "function_call": null, "refusal": null, "role": "assistant", @@ -471,7 +185,7 @@ "choices": [ { "delta": { - "content": " an", + "content": " with", "function_call": null, "refusal": null, "role": "assistant", @@ -497,7 +211,7 @@ "choices": [ { "delta": { - "content": " idea", + "content": " your", "function_call": null, "refusal": null, "role": "assistant", @@ -523,553 +237,7 @@ "choices": [ { "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " what", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " typical", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " like", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " during", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " different", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " seasons", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Spring", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "March", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " May", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Mild", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", + "content": " question", "function_call": null, "refusal": null, "role": "assistant", @@ -1121,7 +289,7 @@ "choices": [ { "delta": { - "content": " usually", + "content": " but", "function_call": null, "refusal": null, "role": "assistant", @@ -1147,7 +315,85 @@ "choices": [ { "delta": { - "content": " ranging", + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " more", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " information", "function_call": null, "refusal": null, "role": "assistant", @@ -1199,7 +445,7 @@ "choices": [ { "delta": { - "content": " ", + "content": " you", "function_call": null, "refusal": null, "role": "assistant", @@ -1225,7 +471,7 @@ "choices": [ { "delta": { - "content": "10", + "content": ".", "function_call": null, "refusal": null, "role": "assistant", @@ -1251,3725 +497,7 @@ "choices": [ { "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "20", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "50", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "68", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " It", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " great", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " time", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " visit", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " for", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " cherry", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " blossom", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " season", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Summer", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "June", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " August", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Hot", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " humid", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " often", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " exceeding", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "30", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "86", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Summer", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " rainy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " heavy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " down", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "p", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "ours", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " during", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " after", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "no", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "ons", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Aut", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "umn", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "September", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " November", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Comfort", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "able", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ranging", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " from", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "10", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "20", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "50", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "68", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Autumn", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " foliage", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " highlight", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " scenery", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Winter", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "December", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " February", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Cold", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " snowy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " sometimes", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " dropping", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " below", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "0", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "32", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Snow", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "fall", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " be", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " significant", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " in", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " some", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " parts", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " city", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Please", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " note", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " that", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " these", + "content": " There", "function_call": null, "refusal": null, "role": "assistant", @@ -5021,7 +549,7 @@ "choices": [ { "delta": { - "content": " general", + "content": " many", "function_call": null, "refusal": null, "role": "assistant", @@ -5047,7 +575,7 @@ "choices": [ { "delta": { - "content": " temperature", + "content": " cities", "function_call": null, "refusal": null, "role": "assistant", @@ -5073,7 +601,33 @@ "choices": [ { "delta": { - "content": " ranges", + "content": " named", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", "function_call": null, "refusal": null, "role": "assistant", @@ -5151,7 +705,7 @@ "choices": [ { "delta": { - "content": " actual", + "content": " each", "function_call": null, "refusal": null, "role": "assistant", @@ -5177,7 +731,7 @@ "choices": [ { "delta": { - "content": " weather", + "content": " one", "function_call": null, "refusal": null, "role": "assistant", @@ -5203,7 +757,7 @@ "choices": [ { "delta": { - "content": " conditions", + "content": " has", "function_call": null, "refusal": null, "role": "assistant", @@ -5229,7 +783,7 @@ "choices": [ { "delta": { - "content": " may", + "content": " a", "function_call": null, "refusal": null, "role": "assistant", @@ -5255,7 +809,7 @@ "choices": [ { "delta": { - "content": " vary", + "content": " different", "function_call": null, "refusal": null, "role": "assistant", @@ -5281,7 +835,7 @@ "choices": [ { "delta": { - "content": " from", + "content": " climate", "function_call": null, "refusal": null, "role": "assistant", @@ -5307,7 +861,319 @@ "choices": [ { "delta": { - "content": " year", + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "Could", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " tell", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " city", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Japan", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " are", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " referring", "function_call": null, "refusal": null, "role": "assistant", @@ -5359,7 +1225,917 @@ "choices": [ { "delta": { - "content": " year", + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " itself", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " always", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " good", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " choice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " often", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " gets", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " confused", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " actual", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " name", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " large", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " populous", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " area", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " K", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "anto", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " region", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " includes", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " larger", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " areas", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " surrounding", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", "function_call": null, "refusal": null, "role": "assistant", @@ -5404,6 +2180,708 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " \n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "If", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " does", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " give", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " us", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " enough", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " grounds", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " then", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " could", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " what", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " approximate", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " month", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " want", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " seasonal", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " changes", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": ")?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -5441,9 +2919,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 208, + "completion_tokens": 111, "prompt_tokens": 32, - "total_tokens": 240, + "total_tokens": 143, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json index 41c4f97ae..a178476e1 100644 --- a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json +++ b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json index dce0c2e4d..7f7bf13ca 100644 --- a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json +++ b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json index d8b125dad..a1464e8c3 100644 --- a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json +++ b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json index e11d38095..665e53245 100644 --- a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json +++ b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_rwasjr3y", + "id": "call_zqu5i0ti", "function": { "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json new file mode 100644 index 000000000..dfae71291 --- /dev/null +++ b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-7e794c73bf79", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json index c82ea6394..fa03baf5e 100644 --- a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json +++ b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json index c33ecca7e..c702a53aa 100644 --- a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json +++ b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json new file mode 100644 index 000000000..9d391c7c8 --- /dev/null +++ b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools0]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_zqu5i0ti", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_zqu5i0ti", + "content": "-212" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "212", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " degrees", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " Celsius", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json index b209de507..15f9b0f96 100644 --- a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json +++ b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json new file mode 100644 index 000000000..70d92b2bf --- /dev/null +++ b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-8fc418c02b8b", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json index 07b7f8331..16078a8c2 100644 --- a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json +++ b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json index aeb1fe320..ec3117ee3 100644 --- a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json +++ b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json @@ -1510,5 +1510,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json index 93155e18c..4d8a2a9ce 100644 --- a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json +++ b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json index 1903e3d19..5200b2e65 100644 --- a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json +++ b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json index d6ec4ea4b..52d599fe0 100644 --- a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json +++ b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json new file mode 100644 index 000000000..15a721ef9 --- /dev/null +++ b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json @@ -0,0 +1,260 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 8, + "prompt_tokens": 38, + "total_tokens": 46, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json new file mode 100644 index 000000000..50e14c9fc --- /dev/null +++ b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_z1rt0qb1", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_z1rt0qb1", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "required", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json index c7ecef75f..4527ab5cd 100644 --- a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json +++ b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_qryqpevz", + "id": "call_9x4z21g1", "function": { "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json index e3f54171f..a1332fddb 100644 --- a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json +++ b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json index 145596d38..64b28de5c 100644 --- a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json +++ b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json index a333490a4..ae1be7520 100644 --- a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json +++ b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json new file mode 100644 index 000000000..f17ae2ae3 --- /dev/null +++ b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-da6fc54bb65d", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json index da06f3968..55e71cf27 100644 --- a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json +++ b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json @@ -71,7 +71,7 @@ "tool_calls": [ { "index": 0, - "id": "call_ur5tbdbt", + "id": "call_5qverjg6", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -121,5 +121,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json index cb2afc5ed..06d8a4305 100644 --- a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json +++ b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json index 2e1e9f4e5..dbb70df6c 100644 --- a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json +++ b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_rq1pcgq7", + "id": "call_z1rt0qb1", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 675e2b904..d413d5201 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id): # Response should be either a function call or a message output_type = response.output[0].type assert output_type in ["function_call", "message"] + + +def test_response_with_instructions(openai_client, client_with_models, text_model_id): + """Test instructions parameter in the responses object.""" + if isinstance(client_with_models, LlamaStackAsLibraryClient): + pytest.skip("OpenAI responses are not supported when testing with library client yet.") + + client = openai_client + + messages = [ + { + "role": "user", + "content": "What is the capital of France?", + } + ] + + # First create a response without instructions parameter + response_w_o_instructions = client.responses.create( + model=text_model_id, + input=messages, + stream=False, + ) + + # Verify we have None in the instructions field + assert response_w_o_instructions.instructions is None + + # Next create a response and pass instructions parameter + instructions = "You are a helpful assistant." + response_with_instructions = client.responses.create( + model=text_model_id, + instructions=instructions, + input=messages, + stream=False, + ) + + # Verify we have a valid instructions field + assert response_with_instructions.instructions == instructions + + # Finally test instructions parameter with a previous response id + instructions2 = "You are a helpful assistant and speak in pirate language." + response_with_instructions2 = client.responses.create( + model=text_model_id, + instructions=instructions2, + input=messages, + previous_response_id=response_with_instructions.id, + stream=False, + ) + + # Verify instructions from previous response was not carried over to the next response + assert response_with_instructions2.instructions == instructions2 diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json new file mode 100644 index 000000000..77e244a01 --- /dev/null +++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json @@ -0,0 +1,44 @@ +{ + "test_id": null, + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama-guard3:1b", + "created": 1753937098, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:l6-v2", + "created": 1753936935, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.2:3b-instruct-fp16", + "created": 1753936925, + "object": "model", + "owned_by": "library" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 3137de0de..a258eb1a0 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -317,3 +317,72 @@ def pytest_ignore_collect(path: str, config: pytest.Config) -> bool: if p.is_relative_to(rp): return False return True + + +def get_vector_io_provider_ids(client): + """Get all available vector_io provider IDs.""" + providers = [p for p in client.providers.list() if p.api == "vector_io"] + return [p.provider_id for p in providers] + + +def vector_provider_wrapper(func): + """Decorator to run a test against all available vector_io providers.""" + import functools + import os + + @functools.wraps(func) + def wrapper(*args, **kwargs): + # Get the vector_io_provider_id from the test arguments + import inspect + + sig = inspect.signature(func) + bound_args = sig.bind(*args, **kwargs) + bound_args.apply_defaults() + + vector_io_provider_id = bound_args.arguments.get("vector_io_provider_id") + if not vector_io_provider_id: + pytest.skip("No vector_io_provider_id provided") + + # Get client_with_models to check available providers + client_with_models = bound_args.arguments.get("client_with_models") + if client_with_models: + available_providers = get_vector_io_provider_ids(client_with_models) + if vector_io_provider_id not in available_providers: + pytest.skip(f"Provider '{vector_io_provider_id}' not available. Available: {available_providers}") + + return func(*args, **kwargs) + + # For replay tests, only use providers that are available in ci-tests environment + if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE") == "replay": + all_providers = ["faiss", "sqlite-vec"] + else: + # For live tests, try all providers (they'll skip if not available) + all_providers = [ + "faiss", + "sqlite-vec", + "milvus", + "chromadb", + "pgvector", + "weaviate", + "qdrant", + ] + + return pytest.mark.parametrize("vector_io_provider_id", all_providers)(wrapper) + + +@pytest.fixture +def vector_io_provider_id(request, client_with_models): + """Fixture that provides a specific vector_io provider ID, skipping if not available.""" + if hasattr(request, "param"): + requested_provider = request.param + available_providers = get_vector_io_provider_ids(client_with_models) + + if requested_provider not in available_providers: + pytest.skip(f"Provider '{requested_provider}' not available. Available: {available_providers}") + + return requested_provider + else: + provider_ids = get_vector_io_provider_ids(client_with_models) + if not provider_ids: + pytest.skip("No vector_io providers available") + return provider_ids[0] diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 68a30fc69..ffd49033d 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient from openai import OpenAI from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail @@ -236,9 +237,16 @@ def instantiate_llama_stack_client(session): if "=" in config: run_config = run_config_from_adhoc_config_spec(config) + + # --stack-config bypasses template so need this to set default embedding model + if "vector_io" in config and "inference" in config: + run_config.vector_stores = VectorStoresConfig( + embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5" + ) + run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") with open(run_config_file.name, "w") as f: - yaml.dump(run_config.model_dump(), f) + yaml.dump(run_config.model_dump(mode="json"), f) config = run_config_file.name client = LlamaStackAsLibraryClient( diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index 98bef0f2c..ad9115756 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -12,9 +12,15 @@ import pytest from llama_stack.core.access_control.access_control import default_policy from llama_stack.core.datatypes import User +from llama_stack.core.storage.datatypes import SqlStoreReference from llama_stack.providers.utils.sqlstore.api import ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig, sqlstore_impl +from llama_stack.providers.utils.sqlstore.sqlstore import ( + PostgresSqlStoreConfig, + SqliteSqlStoreConfig, + register_sqlstore_backends, + sqlstore_impl, +) def get_postgres_config(): @@ -55,8 +61,9 @@ def authorized_store(backend_config): config_func = backend_config config = config_func() - - base_sqlstore = sqlstore_impl(config) + backend_name = f"sql_{type(config).__name__.lower()}" + register_sqlstore_backends({backend_name: config}) + base_sqlstore = sqlstore_impl(SqlStoreReference(backend=backend_name, table_name="authorized_store")) authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy()) yield authorized_store diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py new file mode 100644 index 000000000..e9b80dc0c --- /dev/null +++ b/tests/integration/test_persistence_integration.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import yaml + +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.datatypes import ( + PostgresKVStoreConfig, + PostgresSqlStoreConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, +) + + +def test_starter_distribution_config_loads_and_resolves(): + """Integration: Actual starter config should parse and have correct storage structure.""" + with open("llama_stack/distributions/starter/run.yaml") as f: + config_dict = yaml.safe_load(f) + + config = StackRunConfig(**config_dict) + + # Config should have named backends and explicit store references + assert config.storage is not None + assert "kv_default" in config.storage.backends + assert "sql_default" in config.storage.backends + assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig) + assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig) + + stores = config.storage.stores + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.metadata.namespace == "registry" + + assert stores.inference is not None + assert stores.inference.backend == "sql_default" + assert stores.inference.table_name == "inference_store" + assert stores.inference.max_write_queue_size > 0 + assert stores.inference.num_writers > 0 + + assert stores.conversations is not None + assert stores.conversations.backend == "sql_default" + assert stores.conversations.table_name == "openai_conversations" + + +def test_postgres_demo_distribution_config_loads(): + """Integration: Postgres demo should use Postgres backend for all stores.""" + with open("llama_stack/distributions/postgres-demo/run.yaml") as f: + config_dict = yaml.safe_load(f) + + config = StackRunConfig(**config_dict) + + # Should have postgres backend + assert config.storage is not None + assert "kv_default" in config.storage.backends + assert "sql_default" in config.storage.backends + postgres_backend = config.storage.backends["sql_default"] + assert isinstance(postgres_backend, PostgresSqlStoreConfig) + assert postgres_backend.host == "${env.POSTGRES_HOST:=localhost}" + + kv_backend = config.storage.backends["kv_default"] + assert isinstance(kv_backend, PostgresKVStoreConfig) + + stores = config.storage.stores + # Stores target the Postgres backends explicitly + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.inference is not None + assert stores.inference.backend == "sql_default" diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index e21b233bc..626faf42d 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -8,14 +8,15 @@ import time from io import BytesIO import pytest -from llama_stack_client import BadRequestError, NotFoundError +from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError -from openai import NotFoundError as OpenAINotFoundError from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger +from ..conftest import vector_provider_wrapper + logger = get_logger(name=__name__, category="vector_io") @@ -133,8 +134,9 @@ def compat_client_with_empty_stores(compat_client): clear_files() +@vector_provider_wrapper def test_openai_create_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test creating a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -146,6 +148,7 @@ def test_openai_create_vector_store( metadata={"purpose": "testing", "environment": "integration"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -159,14 +162,18 @@ def test_openai_create_vector_store( assert hasattr(vector_store, "created_at") -def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models): +@vector_provider_wrapper +def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models, vector_io_provider_id): skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - vector_store = compat_client_with_empty_stores.vector_stores.create() + vector_store = compat_client_with_empty_stores.vector_stores.create( + extra_body={"provider_id": vector_io_provider_id} + ) assert vector_store.id +@vector_provider_wrapper def test_openai_list_vector_stores( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test listing vector stores using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -179,6 +186,7 @@ def test_openai_list_vector_stores( metadata={"type": "test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) store2 = client.vector_stores.create( @@ -186,6 +194,7 @@ def test_openai_list_vector_stores( metadata={"type": "test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -206,8 +215,9 @@ def test_openai_list_vector_stores( assert len(limited_response.data) == 1 +@vector_provider_wrapper def test_openai_retrieve_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test retrieving a specific vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -220,6 +230,7 @@ def test_openai_retrieve_vector_store( metadata={"purpose": "retrieval_test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -233,8 +244,9 @@ def test_openai_retrieve_vector_store( assert retrieved_store.object == "vector_store" +@vector_provider_wrapper def test_openai_update_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test modifying a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -247,6 +259,7 @@ def test_openai_update_vector_store( metadata={"version": "1.0"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) time.sleep(1) @@ -264,8 +277,9 @@ def test_openai_update_vector_store( assert modified_store.last_active_at > created_store.last_active_at +@vector_provider_wrapper def test_openai_delete_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test deleting a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -278,6 +292,7 @@ def test_openai_delete_vector_store( metadata={"purpose": "deletion_test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -294,8 +309,9 @@ def test_openai_delete_vector_store( client.vector_stores.retrieve(vector_store_id=created_store.id) +@vector_provider_wrapper def test_openai_vector_store_search_empty( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test searching an empty vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -308,6 +324,7 @@ def test_openai_vector_store_search_empty( metadata={"purpose": "search_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -323,8 +340,14 @@ def test_openai_vector_store_search_empty( assert search_response.has_more is False +@vector_provider_wrapper def test_openai_vector_store_with_chunks( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test vector store functionality with actual chunks using both OpenAI and native APIs.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -338,6 +361,7 @@ def test_openai_vector_store_with_chunks( metadata={"purpose": "chunks_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -380,6 +404,7 @@ def test_openai_vector_store_with_chunks( ("What inspires neural networks?", "doc4", "ai"), ], ) +@vector_provider_wrapper def test_openai_vector_store_search_relevance( compat_client_with_empty_stores, client_with_models, @@ -387,6 +412,7 @@ def test_openai_vector_store_search_relevance( test_case, embedding_model_id, embedding_dimension, + vector_io_provider_id, ): """Test that OpenAI vector store search returns relevant results for different queries.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -402,6 +428,7 @@ def test_openai_vector_store_search_relevance( metadata={"purpose": "relevance_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -430,8 +457,14 @@ def test_openai_vector_store_search_relevance( assert top_result.score > 0 +@vector_provider_wrapper def test_openai_vector_store_search_with_ranking_options( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test OpenAI vector store search with ranking options.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -445,6 +478,7 @@ def test_openai_vector_store_search_with_ranking_options( metadata={"purpose": "ranking_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -483,8 +517,14 @@ def test_openai_vector_store_search_with_ranking_options( assert result.score >= threshold +@vector_provider_wrapper def test_openai_vector_store_search_with_high_score_filter( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test that searching with text very similar to a document and high score threshold returns only that document.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -498,6 +538,7 @@ def test_openai_vector_store_search_with_high_score_filter( metadata={"purpose": "high_score_filtering"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -542,8 +583,14 @@ def test_openai_vector_store_search_with_high_score_filter( assert "python" in top_content.lower() or "programming" in top_content.lower() +@vector_provider_wrapper def test_openai_vector_store_search_with_max_num_results( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test OpenAI vector store search with max_num_results.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -557,6 +604,7 @@ def test_openai_vector_store_search_with_max_num_results( metadata={"purpose": "max_num_results_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -577,8 +625,9 @@ def test_openai_vector_store_search_with_max_num_results( assert len(search_response.data) == 2 +@vector_provider_wrapper def test_openai_vector_store_attach_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store attach file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -591,6 +640,7 @@ def test_openai_vector_store_attach_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -637,8 +687,9 @@ def test_openai_vector_store_attach_file( assert "foobazbar" in top_content.lower() +@vector_provider_wrapper def test_openai_vector_store_attach_files_on_creation( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store attach files on creation.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -668,6 +719,7 @@ def test_openai_vector_store_attach_files_on_creation( file_ids=file_ids, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -700,8 +752,9 @@ def test_openai_vector_store_attach_files_on_creation( assert updated_vector_store.file_counts.failed == 0 +@vector_provider_wrapper def test_openai_vector_store_list_files( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store list files.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -714,6 +767,7 @@ def test_openai_vector_store_list_files( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -773,8 +827,9 @@ def test_openai_vector_store_list_files( assert updated_vector_store.file_counts.in_progress == 0 +@vector_provider_wrapper def test_openai_vector_store_list_files_invalid_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store list files with invalid vector store ID.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -783,14 +838,15 @@ def test_openai_vector_store_list_files_invalid_vector_store( if isinstance(compat_client, LlamaStackAsLibraryClient): errors = ValueError else: - errors = (NotFoundError, OpenAINotFoundError) + errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(errors): compat_client.vector_stores.files.list(vector_store_id="abc123") +@vector_provider_wrapper def test_openai_vector_store_retrieve_file_contents( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store retrieve file contents.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -803,6 +859,7 @@ def test_openai_vector_store_retrieve_file_contents( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -848,8 +905,9 @@ def test_openai_vector_store_retrieve_file_contents( assert file_contents.attributes == attributes +@vector_provider_wrapper def test_openai_vector_store_delete_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store delete file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -862,6 +920,7 @@ def test_openai_vector_store_delete_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -912,8 +971,9 @@ def test_openai_vector_store_delete_file( assert updated_vector_store.file_counts.in_progress == 0 +@vector_provider_wrapper def test_openai_vector_store_delete_file_removes_from_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store delete file removes from vector store.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -926,6 +986,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -962,8 +1023,9 @@ def test_openai_vector_store_delete_file_removes_from_vector_store( assert not search_response.data +@vector_provider_wrapper def test_openai_vector_store_update_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store update file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -976,6 +1038,7 @@ def test_openai_vector_store_update_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1017,8 +1080,9 @@ def test_openai_vector_store_update_file( assert retrieved_file.attributes["foo"] == "baz" +@vector_provider_wrapper def test_create_vector_store_files_duplicate_vector_store_name( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """ This test confirms that client.vector_stores.create() creates a unique ID @@ -1044,6 +1108,7 @@ def test_create_vector_store_files_duplicate_vector_store_name( name="test_store_with_files", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) assert vector_store.file_counts.completed == 0 @@ -1056,6 +1121,7 @@ def test_create_vector_store_files_duplicate_vector_store_name( name="test_store_with_files", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1086,8 +1152,15 @@ def test_create_vector_store_files_duplicate_vector_store_name( @pytest.mark.parametrize("search_mode", ["vector", "keyword", "hybrid"]) +@vector_provider_wrapper def test_openai_vector_store_search_modes( - llama_stack_client, client_with_models, sample_chunks, search_mode, embedding_model_id, embedding_dimension + llama_stack_client, + client_with_models, + sample_chunks, + search_mode, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_models, search_mode) @@ -1097,6 +1170,7 @@ def test_openai_vector_store_search_modes( metadata={"purpose": "search_mode_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1115,8 +1189,9 @@ def test_openai_vector_store_search_modes( assert search_response is not None +@vector_provider_wrapper def test_openai_vector_store_file_batch_create_and_retrieve( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test creating and retrieving a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1128,6 +1203,7 @@ def test_openai_vector_store_file_batch_create_and_retrieve( name="batch_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1178,8 +1254,9 @@ def test_openai_vector_store_file_batch_create_and_retrieve( assert retrieved_batch.status == "completed" # Should be completed after processing +@vector_provider_wrapper def test_openai_vector_store_file_batch_list_files( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test listing files in a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1191,6 +1268,7 @@ def test_openai_vector_store_file_batch_list_files( name="batch_list_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1271,8 +1349,9 @@ def test_openai_vector_store_file_batch_list_files( assert first_page_ids.isdisjoint(second_page_ids) +@vector_provider_wrapper def test_openai_vector_store_file_batch_cancel( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test cancelling a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1284,6 +1363,7 @@ def test_openai_vector_store_file_batch_cancel( name="batch_cancel_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1326,8 +1406,9 @@ def test_openai_vector_store_file_batch_cancel( assert final_batch.status in ["completed", "cancelled"] +@vector_provider_wrapper def test_openai_vector_store_file_batch_retrieve_contents( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test retrieving file contents after file batch processing.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1339,6 +1420,7 @@ def test_openai_vector_store_file_batch_retrieve_contents( name="batch_contents_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1399,8 +1481,9 @@ def test_openai_vector_store_file_batch_retrieve_contents( assert file_data[i][1].decode("utf-8") in content_text +@vector_provider_wrapper def test_openai_vector_store_file_batch_error_handling( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test error handling for file batch operations.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1412,6 +1495,7 @@ def test_openai_vector_store_file_batch_error_handling( name="batch_error_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1443,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling( batch_id="non_existent_batch_id", ) - # Test operations on non-existent vector store (returns NotFoundError) + # Test operations on non-existent vector store (returns BadRequestError) if isinstance(compat_client, LlamaStackAsLibraryClient): vector_store_errors = ValueError else: - vector_store_errors = (NotFoundError, OpenAINotFoundError) + vector_store_errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(vector_store_errors): # Should raise an error for non-existent vector store compat_client.vector_stores.file_batches.create( @@ -1456,8 +1540,9 @@ def test_openai_vector_store_file_batch_error_handling( ) +@vector_provider_wrapper def test_openai_vector_store_embedding_config_from_metadata( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test that embedding configuration works from metadata source.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1471,6 +1556,9 @@ def test_openai_vector_store_embedding_config_from_metadata( "embedding_dimension": str(embedding_dimension), "test_source": "metadata", }, + extra_body={ + "provider_id": vector_io_provider_id, + }, ) assert vector_store_metadata is not None @@ -1489,6 +1577,7 @@ def test_openai_vector_store_embedding_config_from_metadata( extra_body={ "embedding_model": embedding_model_id, "embedding_dimension": int(embedding_dimension), # Ensure same type/value + "provider_id": vector_io_provider_id, }, ) diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index 653299338..e5ca7a0db 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -8,6 +8,8 @@ import pytest from llama_stack.apis.vector_io import Chunk +from ..conftest import vector_provider_wrapper + @pytest.fixture(scope="session") def sample_chunks(): @@ -46,12 +48,13 @@ def client_with_empty_registry(client_with_models): clear_registry() -def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension): +@vector_provider_wrapper +def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): vector_db_name = "test_vector_db" create_response = client_with_empty_registry.vector_stores.create( name=vector_db_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -65,12 +68,13 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embe assert response.id.startswith("vs_") -def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension): +@vector_provider_wrapper +def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): vector_db_name = "test_vector_db" response = client_with_empty_registry.vector_stores.create( name=vector_db_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -100,12 +104,15 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe ("How does machine learning improve over time?", "doc2"), ], ) -def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case): +@vector_provider_wrapper +def test_insert_chunks( + client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id +): vector_db_name = "test_vector_db" create_response = client_with_empty_registry.vector_stores.create( name=vector_db_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -135,7 +142,10 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}" -def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id, embedding_dimension): +@vector_provider_wrapper +def test_insert_chunks_with_precomputed_embeddings( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): vector_io_provider_params_dict = { "inline::milvus": {"score_threshold": -1.0}, "inline::qdrant": {"score_threshold": -1.0}, @@ -145,7 +155,7 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e register_response = client_with_empty_registry.vector_stores.create( name=vector_db_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -181,8 +191,9 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e # expect this test to fail +@vector_provider_wrapper def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( - client_with_empty_registry, embedding_model_id, embedding_dimension + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id ): vector_io_provider_params_dict = { "inline::milvus": {"score_threshold": 0.0}, @@ -194,6 +205,7 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( name=vector_db_name, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -226,33 +238,44 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( assert response.chunks[0].metadata["source"] == "precomputed" -def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_auto_extract_embedding_dimension( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + # This test specifically tests embedding model override, so we keep embedding_model vs = client_with_empty_registry.vector_stores.create( - name="test_auto_extract", extra_body={"embedding_model": embedding_model_id} + name="test_auto_extract", + extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id}, ) assert vs.id is not None -def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_provider_auto_selection_single_provider( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"] if len(providers) != 1: pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}") - vs = client_with_empty_registry.vector_stores.create( - name="test_auto_provider", extra_body={"embedding_model": embedding_model_id} - ) + # Test that when only one provider is available, it's auto-selected (no provider_id needed) + vs = client_with_empty_registry.vector_stores.create(name="test_auto_provider") assert vs.id is not None -def test_provider_id_override(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_provider_id_override( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"] if len(providers) != 1: pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}") provider_id = providers[0].provider_id + # Test explicit provider_id specification (using default embedding model) vs = client_with_empty_registry.vector_stores.create( - name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id} + name="test_provider_override", extra_body={"provider_id": provider_id} ) assert vs.id is not None assert vs.metadata.get("provider_id") == provider_id diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index daaf229e5..7b9f3ca0c 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -23,6 +23,27 @@ def config_with_image_name_int(): image_name: 1234 apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 @@ -54,6 +75,27 @@ def up_to_date_config(): image_name: foo apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 65c3e2333..ff6dd243d 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import ( ConversationServiceConfig, ConversationServiceImpl, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.datatypes import ( + ServerStoresConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends @pytest.fixture @@ -28,7 +35,18 @@ async def service(): with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test_conversations.db" - config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[]) + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), + ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=[]) service = ConversationServiceImpl(config, {}) await service.initialize() yield service @@ -121,9 +139,18 @@ async def test_policy_configuration(): AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*")) ] - config = ConversationServiceConfig( - conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy) service = ConversationServiceImpl(config, {}) await service.initialize() diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index 5fc27e199..fa5348d1c 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -4,90 +4,64 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -""" -Unit tests for Stack validation functions. -""" +"""Unit tests for Stack validation functions.""" from unittest.mock import AsyncMock import pytest -from llama_stack.apis.models import Model, ModelType -from llama_stack.core.stack import validate_default_embedding_model +from llama_stack.apis.models import ListModelsResponse, Model, ModelType +from llama_stack.core.datatypes import QualifiedModel, StackRunConfig, StorageConfig, VectorStoresConfig +from llama_stack.core.stack import validate_vector_stores_config from llama_stack.providers.datatypes import Api -class TestStackValidation: - """Test Stack validation functions.""" +class TestVectorStoresValidation: + async def test_validate_missing_model(self): + """Test validation fails when model not found.""" + run_config = StackRunConfig( + image_name="test", + providers={}, + storage=StorageConfig(backends={}, stores={}), + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="p", + model_id="missing", + ), + ), + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = ListModelsResponse(data=[]) - @pytest.mark.parametrize( - "models,should_raise", - [ - ([], False), # No models - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ) - ], - False, - ), # Single default - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="emb2", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb2", - ), - ], - True, - ), # Multiple defaults - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="llm1", - model_type=ModelType.llm, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="llm1", - ), - ], - False, - ), # Ignores non-embedding - ], - ) - async def test_validate_default_embedding_model(self, models, should_raise): - """Test validation with various model configurations.""" - mock_models_impl = AsyncMock() - mock_models_impl.list_models.return_value = models - impls = {Api.models: mock_models_impl} + with pytest.raises(ValueError, match="not found"): + await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models}) - if should_raise: - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await validate_default_embedding_model(impls) - else: - await validate_default_embedding_model(impls) + async def test_validate_success(self): + """Test validation passes with valid model.""" + run_config = StackRunConfig( + image_name="test", + providers={}, + storage=StorageConfig(backends={}, stores={}), + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="p", + model_id="valid", + ), + ), + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = ListModelsResponse( + data=[ + Model( + identifier="p/valid", # Must match provider_id/model_id format + model_type=ModelType.embedding, + metadata={"embedding_dimension": 768}, + provider_id="p", + provider_resource_id="valid", + ) + ] + ) - async def test_validate_default_embedding_model_no_models_api(self): - """Test validation when models API is not available.""" - await validate_default_embedding_model({}) + await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models}) diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py new file mode 100644 index 000000000..7bceba74d --- /dev/null +++ b/tests/unit/core/test_storage_references.py @@ -0,0 +1,84 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Unit tests for storage backend/reference validation.""" + +import pytest +from pydantic import ValidationError + +from llama_stack.core.datatypes import ( + LLAMA_STACK_RUN_CONFIG_VERSION, + StackRunConfig, +) +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) + + +def _base_run_config(**overrides): + metadata_reference = overrides.pop( + "metadata_reference", + KVStoreReference(backend="kv_default", namespace="registry"), + ) + inference_reference = overrides.pop( + "inference_reference", + InferenceStoreReference(backend="sql_default", table_name="inference"), + ) + conversations_reference = overrides.pop( + "conversations_reference", + SqlStoreReference(backend="sql_default", table_name="conversations"), + ) + storage = overrides.pop( + "storage", + StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"), + "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"), + }, + stores=ServerStoresConfig( + metadata=metadata_reference, + inference=inference_reference, + conversations=conversations_reference, + ), + ), + ) + return StackRunConfig( + version=LLAMA_STACK_RUN_CONFIG_VERSION, + image_name="test-distro", + apis=[], + providers={}, + storage=storage, + **overrides, + ) + + +def test_references_require_known_backend(): + with pytest.raises(ValidationError, match="unknown backend 'missing'"): + _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry")) + + +def test_references_must_match_backend_family(): + with pytest.raises(ValidationError, match="kv_.* is required"): + _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry")) + + with pytest.raises(ValidationError, match="sql_.* is required"): + _base_run_config( + inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"), + ) + + +def test_valid_configuration_passes_validation(): + config = _base_run_config() + stores = config.storage.stores + assert stores.metadata is not None and stores.metadata.backend == "kv_default" + assert stores.inference is not None and stores.inference.backend == "sql_default" + assert stores.conversations is not None and stores.conversations.backend == "sql_default" diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 08a376008..3b0643a13 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -13,6 +13,15 @@ from pydantic import BaseModel, Field, ValidationError from llama_stack.core.datatypes import Api, Provider, StackRunConfig from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) from llama_stack.providers.datatypes import ProviderSpec @@ -29,6 +38,32 @@ class SampleConfig(BaseModel): } +def _default_storage() -> StorageConfig: + return StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=":memory:"), + "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), + ) + + +def make_stack_config(**overrides) -> StackRunConfig: + storage = overrides.pop("storage", _default_storage()) + defaults = dict( + image_name="test_image", + apis=[], + providers={}, + storage=storage, + ) + defaults.update(overrides) + return StackRunConfig(**defaults) + + @pytest.fixture def mock_providers(): """Mock the available_providers function to return test providers.""" @@ -47,8 +82,8 @@ def mock_providers(): @pytest.fixture def base_config(tmp_path): """Create a base StackRunConfig with common settings.""" - return StackRunConfig( - image_name="test_image", + return make_stack_config( + apis=["inference"], providers={ "inference": [ Provider( @@ -222,8 +257,8 @@ class TestProviderRegistry: def test_missing_directory(self, mock_providers): """Test handling of missing external providers directory.""" - config = StackRunConfig( - image_name="test_image", + config = make_stack_config( + apis=["inference"], providers={ "inference": [ Provider( @@ -278,7 +313,6 @@ pip_packages: """Test loading an external provider from a module (success path).""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec # Simulate a provider module with get_provider_spec @@ -293,7 +327,7 @@ pip_packages: import_module_side_effect = make_import_module_side_effect(external_module=fake_module) with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import: - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -317,12 +351,11 @@ pip_packages: def test_external_provider_from_module_not_found(self, mock_providers): """Test handling ModuleNotFoundError for missing provider module.""" - from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(raise_for_external=True) with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -341,12 +374,11 @@ pip_packages: def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers): """Test handling missing get_provider_spec in provider module (should raise ValueError).""" - from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True) with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -399,13 +431,12 @@ class TestGetExternalProvidersFromModule: def test_stackrunconfig_provider_without_module(self, mock_providers): """Test that providers without module attribute are skipped.""" - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module import_module_side_effect = make_import_module_side_effect() with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -426,7 +457,6 @@ class TestGetExternalProvidersFromModule: """Test provider with module containing version spec (e.g., package==1.0.0).""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -444,7 +474,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -564,7 +594,6 @@ class TestGetExternalProvidersFromModule: """Test when get_provider_spec returns a list of specs.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -589,7 +618,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -613,7 +642,6 @@ class TestGetExternalProvidersFromModule: """Test that list return filters specs by provider_type.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -638,7 +666,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -662,7 +690,6 @@ class TestGetExternalProvidersFromModule: """Test that list return adds multiple different provider_types when config requests them.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -688,7 +715,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -718,7 +745,6 @@ class TestGetExternalProvidersFromModule: def test_module_not_found_raises_value_error(self, mock_providers): """Test that ModuleNotFoundError raises ValueError with helpful message.""" - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module def import_side_effect(name): @@ -727,7 +753,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -751,7 +777,6 @@ class TestGetExternalProvidersFromModule: """Test that generic exceptions are properly raised.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module def bad_spec(): @@ -765,7 +790,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -787,10 +812,9 @@ class TestGetExternalProvidersFromModule: def test_empty_provider_list(self, mock_providers): """Test with empty provider list.""" - from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={}, ) @@ -805,7 +829,6 @@ class TestGetExternalProvidersFromModule: """Test multiple APIs with providers.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -830,7 +853,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index e14e033b9..426e2cf64 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -11,11 +11,12 @@ from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference from llama_stack.providers.inline.files.localfs import ( LocalfsFilesImpl, LocalfsFilesImplConfig, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends class MockUploadFile: @@ -36,8 +37,11 @@ async def files_provider(tmp_path): storage_dir = tmp_path / "files" db_path = tmp_path / "files_metadata.db" + backend_name = "sql_localfs_test" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())}) config = LocalfsFilesImplConfig( - storage_dir=storage_dir.as_posix(), metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()) + storage_dir=storage_dir.as_posix(), + metadata_store=SqlStoreReference(backend=backend_name, table_name="files_metadata"), ) provider = LocalfsFilesImpl(config, default_policy()) diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index b2c619e49..fe30e1a77 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -9,7 +9,16 @@ import random import pytest from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture @@ -19,12 +28,28 @@ async def temp_prompt_store(tmp_path_factory): db_path = str(temp_dir / f"{unique_id}.db") from llama_stack.core.datatypes import StackRunConfig - from llama_stack.providers.utils.kvstore import kvstore_impl - mock_run_config = StackRunConfig(image_name="test-distribution", apis=[], providers={}) + storage = StorageConfig( + backends={ + "kv_test": SqliteKVStoreConfig(db_path=db_path), + "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_test", namespace="registry"), + inference=InferenceStoreReference(backend="sql_test", table_name="inference"), + conversations=SqlStoreReference(backend="sql_test", table_name="conversations"), + ), + ) + mock_run_config = StackRunConfig( + image_name="test-distribution", + apis=[], + providers={}, + storage=storage, + ) config = PromptServiceConfig(run_config=mock_run_config) store = PromptServiceImpl(config, deps={}) - store.kvstore = await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)) + register_kvstore_backends({"kv_test": storage.backends["kv_test"]}) + store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts")) yield store diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index cfb3e1327..dfd9b6d52 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -26,6 +26,20 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register KV and SQL store backends for testing.""" + from llama_stack.core.storage.datatypes import SqliteKVStoreConfig, SqliteSqlStoreConfig + from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends + from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + kv_path = str(tmp_path / "test_kv.db") + sql_path = str(tmp_path / "test_sql.db") + + register_kvstore_backends({"kv_default": SqliteKVStoreConfig(db_path=kv_path)}) + register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=sql_path)}) + + @pytest.fixture def mock_apis(): return { @@ -40,15 +54,20 @@ def mock_apis(): @pytest.fixture def config(tmp_path): + from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference + from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig + return MetaReferenceAgentsImplConfig( - persistence_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, - responses_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, + persistence=AgentPersistenceConfig( + agent_state=KVStoreReference( + backend="kv_default", + namespace="agents", + ), + responses=ResponsesStoreReference( + backend="sql_default", + table_name="responses", + ), + ) ) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index e93668a62..f31ec0c28 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,7 +42,7 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import ListToolDefsResponse, ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.core.access_control.access_control import default_policy -from llama_stack.core.datatypes import ResponsesStoreConfig +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) @@ -50,7 +50,7 @@ from llama_stack.providers.utils.responses.responses_store import ( ResponsesStore, _OpenAIResponseObjectWithInputAndMessages, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -814,6 +814,69 @@ async def test_create_openai_response_with_instructions_and_previous_response( assert sent_messages[3].content == "Which is the largest?" +async def test_create_openai_response_with_previous_response_instructions( + openai_responses_impl, mock_responses_store, mock_inference_api +): + """Test prepending instructions and previous response with instructions.""" + + input_item_message = OpenAIResponseMessage( + id="123", + content="Name some towns in Ireland", + role="user", + ) + response_output_message = OpenAIResponseMessage( + id="123", + content="Galway, Longford, Sligo", + status="completed", + role="assistant", + ) + response = _OpenAIResponseObjectWithInputAndMessages( + created_at=1, + id="resp_123", + model="fake_model", + output=[response_output_message], + status="completed", + text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), + input=[input_item_message], + messages=[ + OpenAIUserMessageParam(content="Name some towns in Ireland"), + OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"), + ], + instructions="You are a helpful assistant.", + ) + mock_responses_store.get_response_object.return_value = response + + model = "meta-llama/Llama-3.1-8B-Instruct" + instructions = "You are a geography expert. Provide concise answers." + + mock_inference_api.openai_chat_completion.return_value = fake_stream() + + # Execute + await openai_responses_impl.create_openai_response( + input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123" + ) + + # Verify + mock_inference_api.openai_chat_completion.assert_called_once() + call_args = mock_inference_api.openai_chat_completion.call_args + params = call_args.args[0] + sent_messages = params.messages + + # Check that instructions were prepended as a system message + # and that the previous response instructions were not carried over + assert len(sent_messages) == 4, sent_messages + assert sent_messages[0].role == "system" + assert sent_messages[0].content == instructions + + # Check the rest of the messages were converted correctly + assert sent_messages[1].role == "user" + assert sent_messages[1].content == "Name some towns in Ireland" + assert sent_messages[2].role == "assistant" + assert sent_messages[2].content == "Galway, Longford, Sligo" + assert sent_messages[3].role == "user" + assert sent_messages[3].content == "Which is the largest?" + + async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store): """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters.""" # Setup @@ -854,8 +917,10 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() + backend_name = "sql_responses_test" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path="mock_db_path")}) responses_store = ResponsesStore( - ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ResponsesStoreReference(backend=backend_name, table_name="responses"), policy=default_policy() ) responses_store.sql_store = mock_sql_store diff --git a/tests/unit/providers/batches/conftest.py b/tests/unit/providers/batches/conftest.py index df37141b5..d161bf976 100644 --- a/tests/unit/providers/batches/conftest.py +++ b/tests/unit/providers/batches/conftest.py @@ -12,10 +12,10 @@ from unittest.mock import AsyncMock import pytest +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture @@ -23,8 +23,10 @@ async def provider(): """Create a test provider instance with temporary database.""" with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test_batches.db" + backend_name = "kv_batches_test" kvstore_config = SqliteKVStoreConfig(db_path=str(db_path)) - config = ReferenceBatchesImplConfig(kvstore=kvstore_config) + register_kvstore_backends({backend_name: kvstore_config}) + config = ReferenceBatchesImplConfig(kvstore=KVStoreReference(backend=backend_name, namespace="batches")) # Create kvstore and mock APIs kvstore = await kvstore_impl(config.kvstore) diff --git a/tests/unit/providers/files/conftest.py b/tests/unit/providers/files/conftest.py index 46282e3dc..c64ecc3a3 100644 --- a/tests/unit/providers/files/conftest.py +++ b/tests/unit/providers/files/conftest.py @@ -8,8 +8,9 @@ import boto3 import pytest from moto import mock_aws +from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends class MockUploadFile: @@ -38,11 +39,13 @@ def sample_text_file2(): def s3_config(tmp_path): db_path = tmp_path / "s3_files_metadata.db" + backend_name = f"sql_s3_{tmp_path.name}" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())}) return S3FilesImplConfig( bucket_name=f"test-bucket-{tmp_path.name}", region="not-a-region", auto_create_bucket=True, - metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()), + metadata_store=SqlStoreReference(backend=backend_name, table_name="s3_files_metadata"), ) diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 8e5c85cf1..c78596018 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -12,13 +12,14 @@ import pytest from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import register_kvstore_backends EMBEDDING_DIMENSION = 768 COLLECTION_PREFIX = "test_collection" @@ -112,8 +113,9 @@ async def unique_kvstore_config(tmp_path_factory): unique_id = f"test_kv_{np.random.randint(1e6)}" temp_dir = tmp_path_factory.getbasetemp() db_path = str(temp_dir / f"{unique_id}.db") - - return SqliteKVStoreConfig(db_path=db_path) + backend_name = f"kv_vector_{unique_id}" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + return KVStoreReference(backend=backend_name, namespace=f"vector_io::{unique_id}") @pytest.fixture(scope="session") @@ -138,13 +140,12 @@ async def sqlite_vec_vec_index(embedding_dimension, tmp_path_factory): async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inference_api, embedding_dimension): config = SQLiteVectorIOConfig( db_path=sqlite_vec_db_path, - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = SQLiteVecVectorIOAdapter( config=config, inference_api=mock_inference_api, files_api=None, - models_api=None, ) collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}" await adapter.initialize() @@ -177,13 +178,12 @@ async def faiss_vec_index(embedding_dimension): @pytest.fixture async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension): config = FaissVectorIOConfig( - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = FaissVectorIOAdapter( config=config, inference_api=mock_inference_api, files_api=None, - models_api=None, ) await adapter.initialize() await adapter.register_vector_db( @@ -253,7 +253,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd db="test_db", user="test_user", password="test_password", - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = PGVectorVectorIOAdapter(config, mock_inference_api, None) diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 76969b711..fa5c5f56b 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -11,7 +11,6 @@ import numpy as np import pytest from llama_stack.apis.files import Files -from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.providers.datatypes import HealthStatus @@ -76,12 +75,6 @@ def mock_files_api(): return mock_api -@pytest.fixture -def mock_models_api(): - mock_api = MagicMock(spec=Models) - return mock_api - - @pytest.fixture def faiss_config(): config = MagicMock(spec=FaissVectorIOConfig) @@ -117,7 +110,7 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_ assert response.chunks[1] == sample_chunks[1] -async def test_health_success(mock_models_api): +async def test_health_success(): """Test that the health check returns OK status when faiss is working correctly.""" # Create a fresh instance of FaissVectorIOAdapter for testing config = MagicMock() @@ -126,9 +119,7 @@ async def test_health_success(mock_models_api): with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat: mock_index_flat.return_value = MagicMock() - adapter = FaissVectorIOAdapter( - config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api - ) + adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api) # Calling the health method directly response = await adapter.health() @@ -142,7 +133,7 @@ async def test_health_success(mock_models_api): mock_index_flat.assert_called_once_with(128) # VECTOR_DIMENSION is 128 -async def test_health_failure(mock_models_api): +async def test_health_failure(): """Test that the health check returns ERROR status when faiss encounters an error.""" # Create a fresh instance of FaissVectorIOAdapter for testing config = MagicMock() @@ -152,9 +143,7 @@ async def test_health_failure(mock_models_api): with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat: mock_index_flat.side_effect = Exception("Test error") - adapter = FaissVectorIOAdapter( - config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api - ) + adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api) # Calling the health method directly response = await adapter.health() diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 32d59c91b..ad55b9336 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -6,13 +6,12 @@ import json import time -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock, patch import numpy as np import pytest from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.models import Model, ModelType from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): assert batch.file_counts.in_progress == 8 -async def test_get_default_embedding_model_success(vector_io_adapter): - """Test successful default embedding model detection.""" - # Mock models API with a default model - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="nomic-embed-text-v1.5", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={ - "embedding_dimension": 768, - "default_configured": True, - }, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - result = await vector_io_adapter._get_default_embedding_model_and_dimension() - - assert result is not None - model_id, dimension = result - assert model_id == "nomic-embed-text-v1.5" - assert dimension == 768 - - -async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter): - """Test error when multiple models are marked as default.""" - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="model1", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 768, "default_configured": True}, - ), - Model( - identifier="model2", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ), - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await vector_io_adapter._get_default_embedding_model_and_dimension() - - -async def test_openai_create_vector_store_uses_default_model(vector_io_adapter): - """Test that vector store creation uses default embedding model when none specified.""" - # Mock models API and dependencies - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="default-model", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - vector_io_adapter.register_vector_db = AsyncMock() - vector_io_adapter.__provider_id__ = "test-provider" - - # Create vector store without specifying embedding model - params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store") - result = await vector_io_adapter.openai_create_vector_store(params) - - # Verify the vector store was created with default model - assert result.name == "test-store" - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] - assert call_args.embedding_model == "default-model" - assert call_args.embedding_dimension == 512 - - async def test_embedding_config_from_metadata(vector_io_adapter): """Test that embedding configuration is correctly extracted from metadata.""" @@ -1253,5 +1162,5 @@ async def test_embedding_config_required_model_missing(vector_io_adapter): # Test with no embedding model provided params = OpenAICreateVectorStoreRequestWithExtraBody(name="test_store", metadata={}) - with pytest.raises(ValueError, match="embedding_model is required in extra_body when creating a vector store"): + with pytest.raises(ValueError, match="embedding_model is required"): await vector_io_adapter.openai_create_vector_store(params) diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index e49c9dc77..95022ad33 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -10,13 +10,13 @@ import pytest from llama_stack.apis.inference import Model from llama_stack.apis.vector_dbs import VectorDB from llama_stack.core.datatypes import VectorDBWithOwner +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.store.registry import ( KEY_FORMAT, CachedDiskDistributionRegistry, DiskDistributionRegistry, ) -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture @@ -72,7 +72,11 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, # Test cached version loads from disk db_path = sqlite_kvstore.db_path - cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))) + backend_name = "kv_cached_test" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + cached_registry = CachedDiskDistributionRegistry( + await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) + ) await cached_registry.initialize() result_vector_db = await cached_registry.get("vector_db", "test_vector_db") @@ -101,7 +105,11 @@ async def test_cached_registry_updates(cached_disk_dist_registry): # Verify persisted to disk db_path = cached_disk_dist_registry.kvstore.db_path - new_registry = DiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))) + backend_name = "kv_cached_new" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + new_registry = DiskDistributionRegistry( + await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) + ) await new_registry.initialize() result_vector_db = await new_registry.get("vector_db", "test_vector_db_2") assert result_vector_db is not None diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 04ae89db8..75cbf518b 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -516,6 +516,82 @@ def test_get_attributes_from_claims(): assert set(attributes["teams"]) == {"my-team", "group1", "group2"} assert attributes["namespaces"] == ["my-tenant"] + # Test nested claims with dot notation (e.g., Keycloak resource_access structure) + claims = { + "sub": "user123", + "resource_access": {"llamastack": {"roles": ["inference_max", "admin"]}, "other-client": {"roles": ["viewer"]}}, + "realm_access": {"roles": ["offline_access", "uma_authorization"]}, + } + attributes = get_attributes_from_claims( + claims, {"resource_access.llamastack.roles": "roles", "realm_access.roles": "realm_roles"} + ) + assert set(attributes["roles"]) == {"inference_max", "admin"} + assert set(attributes["realm_roles"]) == {"offline_access", "uma_authorization"} + + # Test that dot notation takes precedence over literal keys with dots + claims = { + "my.dotted.key": "literal-value", + "my": {"dotted": {"key": "nested-value"}}, + } + attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"}) + assert attributes["test"] == ["nested-value"] + + # Test that literal key works when nested traversal doesn't exist + claims = { + "my.dotted.key": "literal-value", + } + attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"}) + assert attributes["test"] == ["literal-value"] + + # Test missing nested paths are handled gracefully + claims = { + "sub": "user123", + "resource_access": {"other-client": {"roles": ["viewer"]}}, + } + attributes = get_attributes_from_claims( + claims, + { + "resource_access.llamastack.roles": "roles", # Missing nested path + "resource_access.missing.key": "missing_attr", # Missing nested path + "completely.missing.path": "another_missing", # Completely missing + "sub": "username", # Existing path + }, + ) + # Only the existing claim should be in attributes + assert attributes["username"] == ["user123"] + assert "roles" not in attributes + assert "missing_attr" not in attributes + assert "another_missing" not in attributes + + # Test mixture of flat and nested claims paths + claims = { + "sub": "user456", + "flat_key": "flat-value", + "scope": "read write admin", + "resource_access": {"app1": {"roles": ["role1", "role2"]}, "app2": {"roles": ["role3"]}}, + "groups": ["group1", "group2"], + "metadata": {"tenant": "tenant1", "region": "us-west"}, + } + attributes = get_attributes_from_claims( + claims, + { + "sub": "user_id", # Flat string + "scope": "permissions", # Flat string with spaces + "groups": "teams", # Flat list + "resource_access.app1.roles": "app1_roles", # Nested list + "resource_access.app2.roles": "app2_roles", # Nested list + "metadata.tenant": "tenant", # Nested string + "metadata.region": "region", # Nested string + }, + ) + assert attributes["user_id"] == ["user456"] + assert set(attributes["permissions"]) == {"read", "write", "admin"} + assert set(attributes["teams"]) == {"group1", "group2"} + assert set(attributes["app1_roles"]) == {"role1", "role2"} + assert attributes["app2_roles"] == ["role3"] + assert attributes["tenant"] == ["tenant1"] + assert attributes["region"] == ["us-west"] + # TODO: add more tests for oauth2 token provider diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py index 85acbc66a..16b1772ce 100644 --- a/tests/unit/server/test_quota.py +++ b/tests/unit/server/test_quota.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from uuid import uuid4 + import pytest from fastapi import FastAPI, Request from fastapi.testclient import TestClient @@ -11,7 +13,8 @@ from starlette.middleware.base import BaseHTTPMiddleware from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod from llama_stack.core.server.quota import QuotaMiddleware -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import register_kvstore_backends class InjectClientIDMiddleware(BaseHTTPMiddleware): @@ -29,8 +32,10 @@ class InjectClientIDMiddleware(BaseHTTPMiddleware): def build_quota_config(db_path) -> QuotaConfig: + backend_name = f"kv_quota_{uuid4().hex}" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=str(db_path))}) return QuotaConfig( - kvstore=SqliteKVStoreConfig(db_path=str(db_path)), + kvstore=KVStoreReference(backend=backend_name, namespace="quota"), anonymous_max_requests=1, authenticated_max_requests=2, period=QuotaPeriod.DAY, diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index 1ee1b2f47..b44f12f7e 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -12,15 +12,22 @@ from unittest.mock import AsyncMock, MagicMock from pydantic import BaseModel, Field from llama_stack.apis.inference import Inference -from llama_stack.core.datatypes import ( - Api, - Provider, - StackRunConfig, -) +from llama_stack.core.datatypes import Api, Provider, StackRunConfig from llama_stack.core.resolver import resolve_impls from llama_stack.core.routers.inference import InferenceRouter from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec +from llama_stack.providers.utils.kvstore import register_kvstore_backends +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None: @@ -65,6 +72,35 @@ class SampleImpl: pass +def make_run_config(**overrides) -> StackRunConfig: + storage = overrides.pop( + "storage", + StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=":memory:"), + "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), + ), + ) + register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")}) + register_sqlstore_backends( + {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")} + ) + defaults = dict( + image_name="test_image", + apis=[], + providers={}, + storage=storage, + ) + defaults.update(overrides) + return StackRunConfig(**defaults) + + async def test_resolve_impls_basic(): # Create a real provider spec provider_spec = InlineProviderSpec( @@ -78,7 +114,7 @@ async def test_resolve_impls_basic(): # Create provider registry with our provider provider_registry = {Api.inference: {provider_spec.provider_type: provider_spec}} - run_config = StackRunConfig( + run_config = make_run_config( image_name="test_image", providers={ "inference": [ diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index f6d63490a..d2de1c759 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import time -from tempfile import TemporaryDirectory import pytest @@ -16,8 +15,16 @@ from llama_stack.apis.inference import ( OpenAIUserMessageParam, Order, ) +from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.inference.inference_store import InferenceStore -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register SQL store backends for testing.""" + db_path = str(tmp_path / "test.db") + register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=db_path)}) def create_test_chat_completion( @@ -44,167 +51,162 @@ def create_test_chat_completion( async def test_inference_store_pagination_basic(): """Test basic pagination functionality.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different timestamps - base_time = int(time.time()) - test_data = [ - ("zebra-task", base_time + 1), - ("apple-job", base_time + 2), - ("moon-work", base_time + 3), - ("banana-run", base_time + 4), - ("car-exec", base_time + 5), - ] + # Create test data with different timestamps + base_time = int(time.time()) + test_data = [ + ("zebra-task", base_time + 1), + ("apple-job", base_time + 2), + ("moon-work", base_time + 3), + ("banana-run", base_time + 4), + ("car-exec", base_time + 5), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test 1: First page with limit=2, descending order (default) - result = await store.list_chat_completions(limit=2, order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "car-exec" # Most recent first - assert result.data[1].id == "banana-run" - assert result.has_more is True - assert result.last_id == "banana-run" + # Test 1: First page with limit=2, descending order (default) + result = await store.list_chat_completions(limit=2, order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "car-exec" # Most recent first + assert result.data[1].id == "banana-run" + assert result.has_more is True + assert result.last_id == "banana-run" - # Test 2: Second page using 'after' parameter - result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) - assert len(result2.data) == 2 - assert result2.data[0].id == "moon-work" - assert result2.data[1].id == "apple-job" - assert result2.has_more is True + # Test 2: Second page using 'after' parameter + result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) + assert len(result2.data) == 2 + assert result2.data[0].id == "moon-work" + assert result2.data[1].id == "apple-job" + assert result2.has_more is True - # Test 3: Final page - result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) - assert len(result3.data) == 1 - assert result3.data[0].id == "zebra-task" - assert result3.has_more is False + # Test 3: Final page + result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) + assert len(result3.data) == 1 + assert result3.data[0].id == "zebra-task" + assert result3.has_more is False async def test_inference_store_pagination_ascending(): """Test pagination with ascending order.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("delta-item", base_time + 1), - ("charlie-task", base_time + 2), - ("alpha-work", base_time + 3), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("delta-item", base_time + 1), + ("charlie-task", base_time + 2), + ("alpha-work", base_time + 3), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test ascending order pagination - result = await store.list_chat_completions(limit=1, order=Order.asc) - assert len(result.data) == 1 - assert result.data[0].id == "delta-item" # Oldest first - assert result.has_more is True + # Test ascending order pagination + result = await store.list_chat_completions(limit=1, order=Order.asc) + assert len(result.data) == 1 + assert result.data[0].id == "delta-item" # Oldest first + assert result.has_more is True - # Second page with ascending order - result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) - assert len(result2.data) == 1 - assert result2.data[0].id == "charlie-task" - assert result2.has_more is True + # Second page with ascending order + result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) + assert len(result2.data) == 1 + assert result2.data[0].id == "charlie-task" + assert result2.has_more is True async def test_inference_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different models - base_time = int(time.time()) - test_data = [ - ("xyz-task", base_time + 1, "model-a"), - ("def-work", base_time + 2, "model-b"), - ("pqr-job", base_time + 3, "model-a"), - ("abc-run", base_time + 4, "model-b"), - ] + # Create test data with different models + base_time = int(time.time()) + test_data = [ + ("xyz-task", base_time + 1, "model-a"), + ("def-work", base_time + 2, "model-b"), + ("pqr-job", base_time + 3, "model-a"), + ("abc-run", base_time + 4, "model-b"), + ] - # Store test chat completions - for completion_id, timestamp, model in test_data: - completion = create_test_chat_completion(completion_id, timestamp, model) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp, model in test_data: + completion = create_test_chat_completion(completion_id, timestamp, model) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test pagination with model filter - result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) - assert len(result.data) == 1 - assert result.data[0].id == "pqr-job" # Most recent model-a - assert result.data[0].model == "model-a" - assert result.has_more is True + # Test pagination with model filter + result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) + assert len(result.data) == 1 + assert result.data[0].id == "pqr-job" # Most recent model-a + assert result.data[0].model == "model-a" + assert result.has_more is True - # Second page with model filter - result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) - assert len(result2.data) == 1 - assert result2.data[0].id == "xyz-task" - assert result2.data[0].model == "model-a" - assert result2.has_more is False + # Second page with model filter + result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) + assert len(result2.data) == 1 + assert result2.data[0].id == "xyz-task" + assert result2.data[0].model == "model-a" + assert result2.has_more is False async def test_inference_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Try to paginate with non-existent ID - with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): - await store.list_chat_completions(after="non-existent", limit=2) + # Try to paginate with non-existent ID + with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): + await store.list_chat_completions(after="non-existent", limit=2) async def test_inference_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("omega-first", base_time + 1), - ("beta-second", base_time + 2), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("omega-first", base_time + 1), + ("beta-second", base_time + 2), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test without limit - result = await store.list_chat_completions(order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "beta-second" # Most recent first - assert result.data[1].id == "omega-first" - assert result.has_more is False + # Test without limit + result = await store.list_chat_completions(order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "beta-second" # Most recent first + assert result.data[1].id == "omega-first" + assert result.has_more is False diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index c27b5a8e5..34cff3d3f 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -6,6 +6,7 @@ import time from tempfile import TemporaryDirectory +from uuid import uuid4 import pytest @@ -15,8 +16,18 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, ) from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.responses.responses_store import ResponsesStore -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + +def build_store(db_path: str, policy: list | None = None) -> ResponsesStore: + backend_name = f"sql_responses_{uuid4().hex}" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path)}) + return ResponsesStore( + ResponsesStoreReference(backend=backend_name, table_name="responses"), + policy=policy or [], + ) def create_test_response_object( @@ -54,7 +65,7 @@ async def test_responses_store_pagination_basic(): """Test basic pagination functionality for responses store.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data with different timestamps @@ -103,7 +114,7 @@ async def test_responses_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data @@ -141,7 +152,7 @@ async def test_responses_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data with different models @@ -182,7 +193,7 @@ async def test_responses_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Try to paginate with non-existent ID @@ -194,7 +205,7 @@ async def test_responses_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data @@ -226,7 +237,7 @@ async def test_responses_store_get_response_object(): """Test retrieving a single response object.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response @@ -254,7 +265,7 @@ async def test_responses_store_input_items_pagination(): """Test pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response with many inputs with explicit IDs @@ -335,7 +346,7 @@ async def test_responses_store_input_items_before_pagination(): """Test before pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response with many inputs with explicit IDs diff --git a/uv.lock b/uv.lock index f9806123d..7f6e0401b 100644 --- a/uv.lock +++ b/uv.lock @@ -4129,27 +4129,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.9.10" +version = "0.14.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" }, - { url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" }, - { url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" }, - { url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" }, - { url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" }, - { url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" }, - { url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" }, - { url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" }, - { url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" }, - { url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" }, - { url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" }, - { url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" }, - { url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" }, - { url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" }, - { url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" }, + { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" }, + { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" }, + { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" }, + { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" }, + { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" }, + { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" }, + { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" }, + { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" }, + { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" }, + { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" }, + { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" }, + { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" }, + { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" }, + { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" }, + { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" }, + { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" }, ] [[package]] @@ -4525,31 +4526,31 @@ wheels = [ [[package]] name = "sqlalchemy" -version = "2.0.41" +version = "2.0.44" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/f2/840d7b9496825333f532d2e3976b8eadbf52034178aac53630d09fe6e1ef/sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22", size = 9819830, upload-time = "2025-10-10T14:39:12.935Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645, upload-time = "2025-05-14T17:55:24.854Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399, upload-time = "2025-05-14T17:55:28.097Z" }, - { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269, upload-time = "2025-05-14T17:50:38.227Z" }, - { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364, upload-time = "2025-05-14T17:51:49.829Z" }, - { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072, upload-time = "2025-05-14T17:50:39.774Z" }, - { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074, upload-time = "2025-05-14T17:51:51.736Z" }, - { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514, upload-time = "2025-05-14T17:55:49.915Z" }, - { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557, upload-time = "2025-05-14T17:55:51.349Z" }, - { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" }, - { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" }, - { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" }, - { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" }, - { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420, upload-time = "2025-05-14T17:55:52.69Z" }, - { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329, upload-time = "2025-05-14T17:55:54.495Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, + { url = "https://files.pythonhosted.org/packages/62/c4/59c7c9b068e6813c898b771204aad36683c96318ed12d4233e1b18762164/sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250", size = 2139675, upload-time = "2025-10-10T16:03:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/d6/ae/eeb0920537a6f9c5a3708e4a5fc55af25900216bdb4847ec29cfddf3bf3a/sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29", size = 2127726, upload-time = "2025-10-10T16:03:35.934Z" }, + { url = "https://files.pythonhosted.org/packages/d8/d5/2ebbabe0379418eda8041c06b0b551f213576bfe4c2f09d77c06c07c8cc5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44", size = 3327603, upload-time = "2025-10-10T15:35:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/45/e5/5aa65852dadc24b7d8ae75b7efb8d19303ed6ac93482e60c44a585930ea5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1", size = 3337842, upload-time = "2025-10-10T15:43:45.431Z" }, + { url = "https://files.pythonhosted.org/packages/41/92/648f1afd3f20b71e880ca797a960f638d39d243e233a7082c93093c22378/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7", size = 3264558, upload-time = "2025-10-10T15:35:29.93Z" }, + { url = "https://files.pythonhosted.org/packages/40/cf/e27d7ee61a10f74b17740918e23cbc5bc62011b48282170dc4c66da8ec0f/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d", size = 3301570, upload-time = "2025-10-10T15:43:48.407Z" }, + { url = "https://files.pythonhosted.org/packages/3b/3d/3116a9a7b63e780fb402799b6da227435be878b6846b192f076d2f838654/sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4", size = 2103447, upload-time = "2025-10-10T15:03:21.678Z" }, + { url = "https://files.pythonhosted.org/packages/25/83/24690e9dfc241e6ab062df82cc0df7f4231c79ba98b273fa496fb3dd78ed/sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e", size = 2130912, upload-time = "2025-10-10T15:03:24.656Z" }, + { url = "https://files.pythonhosted.org/packages/45/d3/c67077a2249fdb455246e6853166360054c331db4613cda3e31ab1cadbef/sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1", size = 2135479, upload-time = "2025-10-10T16:03:37.671Z" }, + { url = "https://files.pythonhosted.org/packages/2b/91/eabd0688330d6fd114f5f12c4f89b0d02929f525e6bf7ff80aa17ca802af/sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45", size = 2123212, upload-time = "2025-10-10T16:03:41.755Z" }, + { url = "https://files.pythonhosted.org/packages/b0/bb/43e246cfe0e81c018076a16036d9b548c4cc649de241fa27d8d9ca6f85ab/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976", size = 3255353, upload-time = "2025-10-10T15:35:31.221Z" }, + { url = "https://files.pythonhosted.org/packages/b9/96/c6105ed9a880abe346b64d3b6ddef269ddfcab04f7f3d90a0bf3c5a88e82/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c", size = 3260222, upload-time = "2025-10-10T15:43:50.124Z" }, + { url = "https://files.pythonhosted.org/packages/44/16/1857e35a47155b5ad927272fee81ae49d398959cb749edca6eaa399b582f/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d", size = 3189614, upload-time = "2025-10-10T15:35:32.578Z" }, + { url = "https://files.pythonhosted.org/packages/88/ee/4afb39a8ee4fc786e2d716c20ab87b5b1fb33d4ac4129a1aaa574ae8a585/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40", size = 3226248, upload-time = "2025-10-10T15:43:51.862Z" }, + { url = "https://files.pythonhosted.org/packages/32/d5/0e66097fc64fa266f29a7963296b40a80d6a997b7ac13806183700676f86/sqlalchemy-2.0.44-cp313-cp313-win32.whl", hash = "sha256:ee51625c2d51f8baadf2829fae817ad0b66b140573939dd69284d2ba3553ae73", size = 2101275, upload-time = "2025-10-10T15:03:26.096Z" }, + { url = "https://files.pythonhosted.org/packages/03/51/665617fe4f8c6450f42a6d8d69243f9420f5677395572c2fe9d21b493b7b/sqlalchemy-2.0.44-cp313-cp313-win_amd64.whl", hash = "sha256:c1c80faaee1a6c3428cecf40d16a2365bcf56c424c92c2b6f0f9ad204b899e9e", size = 2127901, upload-time = "2025-10-10T15:03:27.548Z" }, + { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, ] [package.optional-dependencies]