diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index a5aa31af4..ac600d570 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -82,11 +82,13 @@ runs: echo "No recording changes" fi - - name: Write inference logs to file + - name: Write docker logs to file if: ${{ always() }} shell: bash run: | - sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true + # Ollama logs (if ollama container exists) + sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + # Note: distro container logs are now dumped in integration-tests.sh before container is removed - name: Upload logs if: ${{ always() }} diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index ea3ff2b64..30a8063ea 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -73,6 +73,24 @@ jobs: image_name: kube apis: [] providers: {} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: $run_dir/kvstore.db + sql_default: + type: sql_sqlite + db_path: $run_dir/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default server: port: 8321 EOF diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index e9a758873..a6a86b15f 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -169,9 +169,7 @@ jobs: run: | uv run --no-sync \ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ - tests/integration/vector_io \ - --embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ - --embedding-dimension 768 + tests/integration/vector_io - name: Check Storage and Memory Available After Tests if: ${{ always() }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b5845be53..0fdd50acc 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -37,7 +37,7 @@ jobs: .pre-commit-config.yaml - name: Set up Node.js - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: '20' cache: 'npm' diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml index 0c23b57de..b05898d29 100644 --- a/.github/workflows/precommit-trigger.yml +++ b/.github/workflows/precommit-trigger.yml @@ -99,7 +99,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...` + body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...` }); - name: Checkout PR branch (same-repo) @@ -141,7 +141,7 @@ jobs: - name: Set up Node.js if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: '20' cache: 'npm' diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index df491b680..e30e1e5fb 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -36,7 +36,7 @@ jobs: distros: ${{ steps.set-matrix.outputs.distros }} steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Generate Distribution List id: set-matrix @@ -55,7 +55,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -92,7 +92,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index dfa844175..96243285f 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -24,7 +24,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install uv - uses: astral-sh/setup-uv@eb1897b8dc4b5d5bfe39a428a8f2304605e0983c # v7.0.0 + uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0 with: python-version: ${{ matrix.python-version }} activate-environment: true diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index c16f512d1..e8f318b8e 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -29,7 +29,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup Node.js - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: ${{ matrix.node-version }} cache: 'npm' diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index bb8a48d65..e1ca170f5 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -98,21 +98,30 @@ data: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} - metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore - inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 @@ -137,5 +146,4 @@ data: port: 8323 kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index e2fbfd7a4..2ccaa21aa 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -95,21 +95,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md new file mode 100644 index 000000000..5d391f14c --- /dev/null +++ b/client-sdks/stainless/README.md @@ -0,0 +1,8 @@ +These are the source-of-truth configuration files used to generate the Stainless client SDKs via Stainless. + +- `openapi.yml`: this is the OpenAPI specification for the Llama Stack API. +- `openapi.stainless.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs. + +A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files. + +These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script. \ No newline at end of file diff --git a/client-sdks/stainless/openapi.stainless.yml b/client-sdks/stainless/openapi.stainless.yml new file mode 100644 index 000000000..9461be996 --- /dev/null +++ b/client-sdks/stainless/openapi.stainless.yml @@ -0,0 +1,610 @@ +# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json + +organization: + # Name of your organization or company, used to determine the name of the client + # and headings. + name: llama-stack-client + docs: https://llama-stack.readthedocs.io/en/latest/ + contact: llamastack@meta.com +security: + - {} + - BearerAuth: [] +security_schemes: + BearerAuth: + type: http + scheme: bearer +# `targets` define the output targets and their customization options, such as +# whether to emit the Node SDK and what it's package name should be. +targets: + node: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-typescript + publish: + npm: false + python: + package_name: llama_stack_client + production_repo: llamastack/llama-stack-client-python + options: + use_uv: true + publish: + pypi: true + project_name: llama_stack_client + kotlin: + reverse_domain: com.llama_stack_client.api + production_repo: null + publish: + maven: false + go: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-go + options: + enable_v2: true + back_compat_use_shared_package: false + +# `client_settings` define settings for the API client, such as extra constructor +# arguments (used for authentication), retry behavior, idempotency, etc. +client_settings: + default_env_prefix: LLAMA_STACK_CLIENT + opts: + api_key: + type: string + read_env: LLAMA_STACK_CLIENT_API_KEY + auth: { security_scheme: BearerAuth } + nullable: true + +# `environments` are a map of the name of the environment (e.g. "sandbox", +# "production") to the corresponding url to use. +environments: + production: http://any-hosted-llama-stack.com + +# `pagination` defines [pagination schemes] which provides a template to match +# endpoints and generate next-page and auto-pagination helpers in the SDKs. +pagination: + - name: datasets_iterrows + type: offset + request: + dataset_id: + type: string + start_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_param + limit: + type: integer + response: + data: + type: array + items: + type: object + next_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_start_field + - name: openai_cursor_page + type: cursor + request: + limit: + type: integer + after: + type: string + x-stainless-pagination-property: + purpose: next_cursor_param + response: + data: + type: array + items: {} + has_more: + type: boolean + last_id: + type: string + x-stainless-pagination-property: + purpose: next_cursor_field +# `resources` define the structure and organziation for your API, such as how +# methods and models are grouped together and accessed. See the [configuration +# guide] for more information. +# +# [configuration guide]: +# https://app.stainlessapi.com/docs/guides/configure#resources +resources: + $shared: + models: + agent_config: AgentConfig + interleaved_content_item: InterleavedContentItem + interleaved_content: InterleavedContent + param_type: ParamType + safety_violation: SafetyViolation + sampling_params: SamplingParams + scoring_result: ScoringResult + message: Message + user_message: UserMessage + completion_message: CompletionMessage + tool_response_message: ToolResponseMessage + system_message: SystemMessage + tool_call: ToolCall + query_result: RAGQueryResult + document: RAGDocument + query_config: RAGQueryConfig + response_format: ResponseFormat + toolgroups: + models: + tool_group: ToolGroup + list_tool_groups_response: ListToolGroupsResponse + methods: + register: post /v1/toolgroups + get: get /v1/toolgroups/{toolgroup_id} + list: get /v1/toolgroups + unregister: delete /v1/toolgroups/{toolgroup_id} + tools: + methods: + get: get /v1/tools/{tool_name} + list: + endpoint: get /v1/tools + paginated: false + + tool_runtime: + models: + tool_def: ToolDef + tool_invocation_result: ToolInvocationResult + methods: + list_tools: + endpoint: get /v1/tool-runtime/list-tools + paginated: false + invoke_tool: post /v1/tool-runtime/invoke + subresources: + rag_tool: + methods: + insert: post /v1/tool-runtime/rag-tool/insert + query: post /v1/tool-runtime/rag-tool/query + + responses: + models: + response_object_stream: OpenAIResponseObjectStream + response_object: OpenAIResponseObject + methods: + create: + type: http + endpoint: post /v1/responses + streaming: + stream_event_model: responses.response_object_stream + param_discriminator: stream + retrieve: get /v1/responses/{response_id} + list: + type: http + endpoint: get /v1/responses + delete: + type: http + endpoint: delete /v1/responses/{response_id} + subresources: + input_items: + methods: + list: + type: http + endpoint: get /v1/responses/{response_id}/input_items + + conversations: + models: + conversation_object: Conversation + methods: + create: + type: http + endpoint: post /v1/conversations + retrieve: get /v1/conversations/{conversation_id} + update: + type: http + endpoint: post /v1/conversations/{conversation_id} + delete: + type: http + endpoint: delete /v1/conversations/{conversation_id} + subresources: + items: + methods: + get: + type: http + endpoint: get /v1/conversations/{conversation_id}/items/{item_id} + list: + type: http + endpoint: get /v1/conversations/{conversation_id}/items + create: + type: http + endpoint: post /v1/conversations/{conversation_id}/items + + inspect: + models: + healthInfo: HealthInfo + providerInfo: ProviderInfo + routeInfo: RouteInfo + versionInfo: VersionInfo + methods: + health: get /v1/health + version: get /v1/version + + embeddings: + models: + create_embeddings_response: OpenAIEmbeddingsResponse + methods: + create: post /v1/embeddings + + chat: + models: + chat_completion_chunk: OpenAIChatCompletionChunk + subresources: + completions: + methods: + create: + type: http + endpoint: post /v1/chat/completions + streaming: + stream_event_model: chat.chat_completion_chunk + param_discriminator: stream + list: + type: http + endpoint: get /v1/chat/completions + retrieve: + type: http + endpoint: get /v1/chat/completions/{completion_id} + completions: + methods: + create: + type: http + endpoint: post /v1/completions + streaming: + param_discriminator: stream + + vector_io: + models: + queryChunksResponse: QueryChunksResponse + methods: + insert: post /v1/vector-io/insert + query: post /v1/vector-io/query + + vector_stores: + models: + vector_store: VectorStoreObject + list_vector_stores_response: VectorStoreListResponse + vector_store_delete_response: VectorStoreDeleteResponse + vector_store_search_response: VectorStoreSearchResponsePage + methods: + create: post /v1/vector_stores + list: + endpoint: get /v1/vector_stores + retrieve: get /v1/vector_stores/{vector_store_id} + update: post /v1/vector_stores/{vector_store_id} + delete: delete /v1/vector_stores/{vector_store_id} + search: post /v1/vector_stores/{vector_store_id}/search + subresources: + files: + models: + vector_store_file: VectorStoreFileObject + methods: + list: get /v1/vector_stores/{vector_store_id}/files + retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id} + update: post /v1/vector_stores/{vector_store_id}/files/{file_id} + delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id} + create: post /v1/vector_stores/{vector_store_id}/files + content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content + file_batches: + models: + vector_store_file_batches: VectorStoreFileBatchObject + list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse + methods: + create: post /v1/vector_stores/{vector_store_id}/file_batches + retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id} + list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files + cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel + + models: + models: + model: Model + list_models_response: ListModelsResponse + methods: + retrieve: get /v1/models/{model_id} + list: + endpoint: get /v1/models + paginated: false + register: post /v1/models + unregister: delete /v1/models/{model_id} + subresources: + openai: + methods: + list: + endpoint: get /v1/models + paginated: false + + providers: + models: + list_providers_response: ListProvidersResponse + methods: + list: + endpoint: get /v1/providers + paginated: false + retrieve: get /v1/providers/{provider_id} + + routes: + models: + list_routes_response: ListRoutesResponse + methods: + list: + endpoint: get /v1/inspect/routes + paginated: false + + + moderations: + models: + create_response: ModerationObject + methods: + create: post /v1/moderations + + + safety: + models: + run_shield_response: RunShieldResponse + methods: + run_shield: post /v1/safety/run-shield + + + shields: + models: + shield: Shield + list_shields_response: ListShieldsResponse + methods: + retrieve: get /v1/shields/{identifier} + list: + endpoint: get /v1/shields + paginated: false + register: post /v1/shields + delete: delete /v1/shields/{identifier} + + synthetic_data_generation: + models: + syntheticDataGenerationResponse: SyntheticDataGenerationResponse + methods: + generate: post /v1/synthetic-data-generation/generate + + telemetry: + models: + span_with_status: SpanWithStatus + trace: Trace + query_spans_response: QuerySpansResponse + event: Event + query_condition: QueryCondition + methods: + query_traces: + endpoint: post /v1alpha/telemetry/traces + skip_test_reason: 'unsupported query params in java / kotlin' + get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree + query_spans: + endpoint: post /v1alpha/telemetry/spans + skip_test_reason: 'unsupported query params in java / kotlin' + query_metrics: + endpoint: post /v1alpha/telemetry/metrics/{metric_name} + skip_test_reason: 'unsupported query params in java / kotlin' + # log_event: post /v1alpha/telemetry/events + save_spans_to_dataset: post /v1alpha/telemetry/spans/export + get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id} + get_trace: get /v1alpha/telemetry/traces/{trace_id} + + scoring: + methods: + score: post /v1/scoring/score + score_batch: post /v1/scoring/score-batch + scoring_functions: + methods: + retrieve: get /v1/scoring-functions/{scoring_fn_id} + list: + endpoint: get /v1/scoring-functions + paginated: false + register: post /v1/scoring-functions + models: + scoring_fn: ScoringFn + scoring_fn_params: ScoringFnParams + list_scoring_functions_response: ListScoringFunctionsResponse + + benchmarks: + methods: + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id} + list: + endpoint: get /v1alpha/eval/benchmarks + paginated: false + register: post /v1alpha/eval/benchmarks + models: + benchmark: Benchmark + list_benchmarks_response: ListBenchmarksResponse + + files: + methods: + create: post /v1/files + list: get /v1/files + retrieve: get /v1/files/{file_id} + delete: delete /v1/files/{file_id} + content: get /v1/files/{file_id}/content + models: + file: OpenAIFileObject + list_files_response: ListOpenAIFileResponse + delete_file_response: OpenAIFileDeleteResponse + + alpha: + subresources: + inference: + methods: + rerank: post /v1alpha/inference/rerank + + post_training: + models: + algorithm_config: AlgorithmConfig + post_training_job: PostTrainingJob + list_post_training_jobs_response: ListPostTrainingJobsResponse + methods: + preference_optimize: post /v1alpha/post-training/preference-optimize + supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune + subresources: + job: + methods: + artifacts: get /v1alpha/post-training/job/artifacts + cancel: post /v1alpha/post-training/job/cancel + status: get /v1alpha/post-training/job/status + list: + endpoint: get /v1alpha/post-training/jobs + paginated: false + + eval: + methods: + evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + + subresources: + jobs: + methods: + cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result + models: + evaluate_response: EvaluateResponse + benchmark_config: BenchmarkConfig + job: Job + + agents: + methods: + create: post /v1alpha/agents + list: get /v1alpha/agents + retrieve: get /v1alpha/agents/{agent_id} + delete: delete /v1alpha/agents/{agent_id} + models: + inference_step: InferenceStep + tool_execution_step: ToolExecutionStep + tool_response: ToolResponse + shield_call_step: ShieldCallStep + memory_retrieval_step: MemoryRetrievalStep + subresources: + session: + models: + session: Session + methods: + list: get /v1alpha/agents/{agent_id}/sessions + create: post /v1alpha/agents/{agent_id}/session + delete: delete /v1alpha/agents/{agent_id}/session/{session_id} + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id} + steps: + methods: + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id} + turn: + models: + turn: Turn + turn_response_event: AgentTurnResponseEvent + agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk + methods: + create: + type: http + endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn + streaming: + stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk + param_discriminator: stream + retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id} + resume: + type: http + endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume + streaming: + stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk + param_discriminator: stream + + beta: + subresources: + datasets: + models: + list_datasets_response: ListDatasetsResponse + methods: + register: post /v1beta/datasets + retrieve: get /v1beta/datasets/{dataset_id} + list: + endpoint: get /v1beta/datasets + paginated: false + unregister: delete /v1beta/datasets/{dataset_id} + iterrows: get /v1beta/datasetio/iterrows/{dataset_id} + appendrows: post /v1beta/datasetio/append-rows/{dataset_id} + + +settings: + license: MIT + unwrap_response_fields: [ data ] + +openapi: + transformations: + - command: renameValue + reason: pydantic reserved name + args: + filter: + only: + - '$.components.schemas.InferenceStep.properties.model_response' + rename: + python: + property_name: 'inference_model_response' + + # - command: renameValue + # reason: pydantic reserved name + # args: + # filter: + # only: + # - '$.components.schemas.Model.properties.model_type' + # rename: + # python: + # property_name: 'type' + - command: mergeObject + reason: Better return_type using enum + args: + target: + - '$.components.schemas' + object: + ReturnType: + additionalProperties: false + properties: + type: + enum: + - string + - number + - boolean + - array + - object + - json + - union + - chat_completion_input + - completion_input + - agent_turn_input + required: + - type + type: object + - command: replaceProperties + reason: Replace return type properties with better model (see above) + args: + filter: + only: + - '$.components.schemas.ScoringFn.properties.return_type' + - '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type' + value: + $ref: '#/components/schemas/ReturnType' + - command: oneOfToAnyOf + reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants + - reason: For better names + command: extractToRefs + args: + ref: + target: '$.components.schemas.ToolCallDelta.properties.tool_call' + name: '#/components/schemas/ToolCallOrString' + +# `readme` is used to configure the code snippets that will be rendered in the +# README.md of various SDKs. In particular, you can change the `headline` +# snippet's endpoint and the arguments to call it with. +readme: + example_requests: + default: + type: request + endpoint: post /v1/chat/completions + params: &ref_0 {} + headline: + type: request + endpoint: post /v1/models + params: *ref_0 + pagination: + type: request + endpoint: post /v1/chat/completions + params: {} diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml new file mode 100644 index 000000000..98a309f12 --- /dev/null +++ b/client-sdks/stainless/openapi.yml @@ -0,0 +1,13322 @@ +openapi: 3.1.0 +info: + title: >- + Llama Stack Specification - Stable & Experimental APIs + version: v1 + description: >- + This is the specification of the Llama Stack that provides + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. + + **🔗 COMBINED**: This specification includes both stable production-ready APIs + and experimental pre-release APIs. Use stable APIs for production deployments + and experimental APIs for testing new features. +servers: + - url: http://any-hosted-llama-stack.com +paths: + /v1/chat/completions: + get: + responses: + '200': + description: A ListOpenAIChatCompletionResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: List chat completions. + description: List chat completions. + parameters: + - name: after + in: query + description: >- + The ID of the last chat completion to return. + required: false + schema: + type: string + - name: limit + in: query + description: >- + The maximum number of chat completions to return. + required: false + schema: + type: integer + - name: model + in: query + description: The model to filter by. + required: false + schema: + type: string + - name: order + in: query + description: >- + The order to sort the chat completions by: "asc" or "desc". Defaults to + "desc". + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + post: + responses: + '200': + description: An OpenAIChatCompletion. + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletion' + - $ref: '#/components/schemas/OpenAIChatCompletionChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create chat completions. + description: >- + Create chat completions. + + Generate an OpenAI-compatible chat completion for the given messages using + the specified model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' + required: true + deprecated: false + /v1/chat/completions/{completion_id}: + get: + responses: + '200': + description: A OpenAICompletionWithInputMessages. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Get chat completion. + description: >- + Get chat completion. + + Describe a chat completion by its ID. + parameters: + - name: completion_id + in: path + description: ID of the chat completion. + required: true + schema: + type: string + deprecated: false + /v1/completions: + post: + responses: + '200': + description: An OpenAICompletion. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletion' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create completion. + description: >- + Create completion. + + Generate an OpenAI-compatible completion for the given prompt using the specified + model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' + required: true + deprecated: false + /v1/conversations: + post: + responses: + '200': + description: The created conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Create a conversation. + description: >- + Create a conversation. + + Create a conversation. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateConversationRequest' + required: true + deprecated: false + /v1/conversations/{conversation_id}: + get: + responses: + '200': + description: The conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Retrieve a conversation. + description: >- + Retrieve a conversation. + + Get a conversation with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: The updated conversation object. + content: + application/json: + schema: + $ref: '#/components/schemas/Conversation' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Update a conversation. + description: >- + Update a conversation. + + Update a conversation's metadata with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateConversationRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: The deleted conversation resource. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationDeletedResource' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Delete a conversation. + description: >- + Delete a conversation. + + Delete a conversation with the given ID. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + deprecated: false + /v1/conversations/{conversation_id}/items: + get: + responses: + '200': + description: List of conversation items. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: List items. + description: >- + List items. + + List items in the conversation. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: after + in: query + description: >- + An item ID to list items after, used in pagination. + required: true + schema: + oneOf: + - type: string + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: include + in: query + description: >- + Specify additional output data to include in the response. + required: true + schema: + oneOf: + - type: array + items: + type: string + enum: + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: limit + in: query + description: >- + A limit on the number of objects to be returned (1-100, default 20). + required: true + schema: + oneOf: + - type: integer + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + - name: order + in: query + description: >- + The order to return items in (asc or desc, default desc). + required: true + schema: + oneOf: + - type: string + enum: + - asc + - desc + - type: object + title: NotGiven + description: >- + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different + behavior). + + For example: + + + ```py + + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + + get(timeout=1) # 1s timeout + + get(timeout=None) # No timeout + + get() # Default timeout behavior, which may not be statically known + at the method definition. + + ``` + deprecated: false + post: + responses: + '200': + description: List of created items. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Create items. + description: >- + Create items. + + Create items in the conversation. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AddItemsRequest' + required: true + deprecated: false + /v1/conversations/{conversation_id}/items/{item_id}: + get: + responses: + '200': + description: The conversation item. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItem' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Retrieve an item. + description: >- + Retrieve an item. + + Retrieve a conversation item. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: item_id + in: path + description: The item identifier. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: The deleted item resource. + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemDeletedResource' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Conversations + summary: Delete an item. + description: >- + Delete an item. + + Delete a conversation item. + parameters: + - name: conversation_id + in: path + description: The conversation identifier. + required: true + schema: + type: string + - name: item_id + in: path + description: The item identifier. + required: true + schema: + type: string + deprecated: false + /v1/embeddings: + post: + responses: + '200': + description: >- + An OpenAIEmbeddingsResponse containing the embeddings. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIEmbeddingsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: Create embeddings. + description: >- + Create embeddings. + + Generate OpenAI-compatible embeddings for the given input using the specified + model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' + required: true + deprecated: false + /v1/files: + get: + responses: + '200': + description: >- + An ListOpenAIFileResponse containing the list of files. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIFileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: List files. + description: >- + List files. + + Returns a list of files that belong to the user's organization. + parameters: + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. For instance, if you make a list request and receive + 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo + in order to fetch the next page of the list. + required: false + schema: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 10,000, and the default is 10,000. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + $ref: '#/components/schemas/Order' + - name: purpose + in: query + description: >- + Only return files with the given purpose. + required: false + schema: + $ref: '#/components/schemas/OpenAIFilePurpose' + deprecated: false + post: + responses: + '200': + description: >- + An OpenAIFileObject representing the uploaded file. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Upload file. + description: >- + Upload file. + + Upload a file that can be used across various endpoints. + + + The file upload should be a multipart form request with: + + - file: The File object (not file name) to be uploaded. + + - purpose: The intended purpose of the uploaded file. + + - expires_after: Optional form values describing expiration for the file. + parameters: [] + requestBody: + content: + multipart/form-data: + schema: + type: object + properties: + file: + type: string + format: binary + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + $ref: '#/components/schemas/ExpiresAfter' + required: + - file + - purpose + required: true + deprecated: false + /v1/files/{file_id}: + get: + responses: + '200': + description: >- + An OpenAIFileObject containing file information. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Retrieve file. + description: >- + Retrieve file. + + Returns information about a specific file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: >- + An OpenAIFileDeleteResponse indicating successful deletion. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFileDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Delete file. + description: Delete file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + /v1/files/{file_id}/content: + get: + responses: + '200': + description: >- + The raw file content as a binary response. + content: + application/json: + schema: + $ref: '#/components/schemas/Response' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Files + summary: Retrieve file content. + description: >- + Retrieve file content. + + Returns the contents of the specified file. + parameters: + - name: file_id + in: path + description: >- + The ID of the file to use for this request. + required: true + schema: + type: string + deprecated: false + /v1/health: + get: + responses: + '200': + description: >- + Health information indicating if the service is operational. + content: + application/json: + schema: + $ref: '#/components/schemas/HealthInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: Get health status. + description: >- + Get health status. + + Get the current health status of the service. + parameters: [] + deprecated: false + /v1/inspect/routes: + get: + responses: + '200': + description: >- + Response containing information about all available routes. + content: + application/json: + schema: + $ref: '#/components/schemas/ListRoutesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: List routes. + description: >- + List routes. + + List all available API routes with their methods and implementing providers. + parameters: [] + deprecated: false + /v1/models: + get: + responses: + '200': + description: A ListModelsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: List all models. + description: List all models. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Model. + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Register model. + description: >- + Register model. + + Register a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterModelRequest' + required: true + deprecated: false + /v1/models/{model_id}: + get: + responses: + '200': + description: A Model. + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Get model. + description: >- + Get model. + + Get a model by its identifier. + parameters: + - name: model_id + in: path + description: The identifier of the model to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: Unregister model. + description: >- + Unregister model. + + Unregister a model. + parameters: + - name: model_id + in: path + description: >- + The identifier of the model to unregister. + required: true + schema: + type: string + deprecated: false + /v1/moderations: + post: + responses: + '200': + description: A moderation object. + content: + application/json: + schema: + $ref: '#/components/schemas/ModerationObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Safety + summary: Create moderation. + description: >- + Create moderation. + + Classifies if text and/or image inputs are potentially harmful. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunModerationRequest' + required: true + deprecated: false + /v1/prompts: + get: + responses: + '200': + description: >- + A ListPromptsResponse containing all prompts. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPromptsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: List all prompts. + description: List all prompts. + parameters: [] + deprecated: false + post: + responses: + '200': + description: The created Prompt resource. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Create prompt. + description: >- + Create prompt. + + Create a new prompt. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + required: true + deprecated: false + /v1/prompts/{prompt_id}: + get: + responses: + '200': + description: A Prompt resource. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Get prompt. + description: >- + Get prompt. + + Get a prompt by its identifier and optional version. + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to get. + required: true + schema: + type: string + - name: version + in: query + description: >- + The version of the prompt to get (defaults to latest). + required: false + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: >- + The updated Prompt resource with incremented version. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Update prompt. + description: >- + Update prompt. + + Update an existing prompt (increments version). + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Delete prompt. + description: >- + Delete prompt. + + Delete a prompt. + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt to delete. + required: true + schema: + type: string + deprecated: false + /v1/prompts/{prompt_id}/set-default-version: + post: + responses: + '200': + description: >- + The prompt with the specified version now set as default. + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: Set prompt version. + description: >- + Set prompt version. + + Set which version of a prompt should be the default in get_prompt (latest). + parameters: + - name: prompt_id + in: path + description: The identifier of the prompt. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SetDefaultVersionRequest' + required: true + deprecated: false + /v1/prompts/{prompt_id}/versions: + get: + responses: + '200': + description: >- + A ListPromptsResponse containing all versions of the prompt. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPromptsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Prompts + summary: List prompt versions. + description: >- + List prompt versions. + + List all versions of a specific prompt. + parameters: + - name: prompt_id + in: path + description: >- + The identifier of the prompt to list versions for. + required: true + schema: + type: string + deprecated: false + /v1/providers: + get: + responses: + '200': + description: >- + A ListProvidersResponse containing information about all providers. + content: + application/json: + schema: + $ref: '#/components/schemas/ListProvidersResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Providers + summary: List providers. + description: >- + List providers. + + List all available providers. + parameters: [] + deprecated: false + /v1/providers/{provider_id}: + get: + responses: + '200': + description: >- + A ProviderInfo object containing the provider's details. + content: + application/json: + schema: + $ref: '#/components/schemas/ProviderInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Providers + summary: Get provider. + description: >- + Get provider. + + Get detailed information about a specific provider. + parameters: + - name: provider_id + in: path + description: The ID of the provider to inspect. + required: true + schema: + type: string + deprecated: false + /v1/responses: + get: + responses: + '200': + description: A ListOpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all responses. + description: List all responses. + parameters: + - name: after + in: query + description: The ID of the last response to return. + required: false + schema: + type: string + - name: limit + in: query + description: The number of responses to return. + required: false + schema: + type: integer + - name: model + in: query + description: The model to filter responses by. + required: false + schema: + type: string + - name: order + in: query + description: >- + The order to sort responses by when sorted by created_at ('asc' or 'desc'). + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + post: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIResponseObjectStream' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a model response. + description: Create a model response. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateOpenaiResponseRequest' + required: true + deprecated: false + x-llama-stack-extra-body-params: + - name: guardrails + schema: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/ResponseGuardrailSpec' + description: >- + List of guardrails to apply during response generation. Guardrails provide + safety and content moderation. + required: false + /v1/responses/{response_id}: + get: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Get a model response. + description: Get a model response. + parameters: + - name: response_id + in: path + description: >- + The ID of the OpenAI response to retrieve. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: An OpenAIDeleteResponseObject + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIDeleteResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Delete a response. + description: Delete a response. + parameters: + - name: response_id + in: path + description: The ID of the OpenAI response to delete. + required: true + schema: + type: string + deprecated: false + /v1/responses/{response_id}/input_items: + get: + responses: + '200': + description: An ListOpenAIResponseInputItem. + content: + application/json: + schema: + $ref: '#/components/schemas/ListOpenAIResponseInputItem' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List input items. + description: List input items. + parameters: + - name: response_id + in: path + description: >- + The ID of the response to retrieve input items for. + required: true + schema: + type: string + - name: after + in: query + description: >- + An item ID to list items after, used for pagination. + required: false + schema: + type: string + - name: before + in: query + description: >- + An item ID to list items before, used for pagination. + required: false + schema: + type: string + - name: include + in: query + description: >- + Additional fields to include in the response. + required: false + schema: + type: array + items: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + The order to return the input items in. Default is desc. + required: false + schema: + $ref: '#/components/schemas/Order' + deprecated: false + /v1/safety/run-shield: + post: + responses: + '200': + description: A RunShieldResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/RunShieldResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Safety + summary: Run shield. + description: >- + Run shield. + + Run a shield. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunShieldRequest' + required: true + deprecated: false + /v1/scoring-functions: + get: + responses: + '200': + description: A ListScoringFunctionsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: List all scoring functions. + description: List all scoring functions. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Register a scoring function. + description: Register a scoring function. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterScoringFunctionRequest' + required: true + deprecated: false + /v1/scoring-functions/{scoring_fn_id}: + get: + responses: + '200': + description: A ScoringFn. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoringFn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Get a scoring function by its ID. + description: Get a scoring function by its ID. + parameters: + - name: scoring_fn_id + in: path + description: The ID of the scoring function to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ScoringFunctions + summary: Unregister a scoring function. + description: Unregister a scoring function. + parameters: + - name: scoring_fn_id + in: path + description: >- + The ID of the scoring function to unregister. + required: true + schema: + type: string + deprecated: false + /v1/scoring/score: + post: + responses: + '200': + description: >- + A ScoreResponse object containing rows and aggregated results. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + summary: Score a list of rows. + description: Score a list of rows. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreRequest' + required: true + deprecated: false + /v1/scoring/score-batch: + post: + responses: + '200': + description: A ScoreBatchResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + summary: Score a batch of rows. + description: Score a batch of rows. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreBatchRequest' + required: true + deprecated: false + /v1/shields: + get: + responses: + '200': + description: A ListShieldsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListShieldsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: List all shields. + description: List all shields. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Shield. + content: + application/json: + schema: + $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Register a shield. + description: Register a shield. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterShieldRequest' + required: true + deprecated: false + /v1/shields/{identifier}: + get: + responses: + '200': + description: A Shield. + content: + application/json: + schema: + $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Get a shield by its identifier. + description: Get a shield by its identifier. + parameters: + - name: identifier + in: path + description: The identifier of the shield to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: Unregister a shield. + description: Unregister a shield. + parameters: + - name: identifier + in: path + description: >- + The identifier of the shield to unregister. + required: true + schema: + type: string + deprecated: false + /v1/synthetic-data-generation/generate: + post: + responses: + '200': + description: >- + Response containing filtered synthetic data samples and optional statistics + content: + application/json: + schema: + $ref: '#/components/schemas/SyntheticDataGenerationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - SyntheticDataGeneration (Coming Soon) + summary: >- + Generate synthetic data based on input dialogs and apply filtering. + description: >- + Generate synthetic data based on input dialogs and apply filtering. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SyntheticDataGenerateRequest' + required: true + deprecated: false + /v1/tool-runtime/invoke: + post: + responses: + '200': + description: A ToolInvocationResult. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolInvocationResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: Run a tool with the given arguments. + description: Run a tool with the given arguments. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InvokeToolRequest' + required: true + deprecated: false + /v1/tool-runtime/list-tools: + get: + responses: + '200': + description: A ListToolDefsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolDefsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolRuntime + summary: List all tools in the runtime. + description: List all tools in the runtime. + parameters: + - name: tool_group_id + in: query + description: >- + The ID of the tool group to list tools for. + required: false + schema: + type: string + - name: mcp_endpoint + in: query + description: >- + The MCP endpoint to use for the tool group. + required: false + schema: + $ref: '#/components/schemas/URL' + deprecated: false + /v1/toolgroups: + get: + responses: + '200': + description: A ListToolGroupsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: List tool groups with optional provider. + description: List tool groups with optional provider. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Register a tool group. + description: Register a tool group. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterToolGroupRequest' + required: true + deprecated: false + /v1/toolgroups/{toolgroup_id}: + get: + responses: + '200': + description: A ToolGroup. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolGroup' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Get a tool group by its ID. + description: Get a tool group by its ID. + parameters: + - name: toolgroup_id + in: path + description: The ID of the tool group to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Unregister a tool group. + description: Unregister a tool group. + parameters: + - name: toolgroup_id + in: path + description: The ID of the tool group to unregister. + required: true + schema: + type: string + deprecated: false + /v1/tools: + get: + responses: + '200': + description: A ListToolDefsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolDefsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: List tools with optional tool group. + description: List tools with optional tool group. + parameters: + - name: toolgroup_id + in: query + description: >- + The ID of the tool group to list tools for. + required: false + schema: + type: string + deprecated: false + /v1/tools/{tool_name}: + get: + responses: + '200': + description: A ToolDef. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolDef' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - ToolGroups + summary: Get a tool by its name. + description: Get a tool by its name. + parameters: + - name: tool_name + in: path + description: The name of the tool to get. + required: true + schema: + type: string + deprecated: false + /v1/vector-io/insert: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Insert chunks into a vector database. + description: Insert chunks into a vector database. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InsertChunksRequest' + required: true + deprecated: false + /v1/vector-io/query: + post: + responses: + '200': + description: A QueryChunksResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Query chunks from a vector database. + description: Query chunks from a vector database. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksRequest' + required: true + deprecated: false + /v1/vector_stores: + get: + responses: + '200': + description: >- + A VectorStoreListResponse containing the list of vector stores. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreListResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Returns a list of vector stores. + description: Returns a list of vector stores. + parameters: + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + type: string + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + A cursor for use in pagination. `before` is an object ID that defines + your place in the list. + required: false + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreObject representing the created vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}: + get: + responses: + '200': + description: >- + A VectorStoreObject representing the vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieves a vector store. + description: Retrieves a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to retrieve. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreObject representing the updated vector store. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Updates a vector store. + description: Updates a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: >- + A VectorStoreDeleteResponse indicating the deletion status. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Delete a vector store. + description: Delete a vector store. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to delete. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches: + post: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the created file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to create the file batch for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: + get: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieve a vector store file batch. + description: Retrieve a vector store file batch. + parameters: + - name: batch_id + in: path + description: The ID of the file batch to retrieve. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: + post: + responses: + '200': + description: >- + A VectorStoreFileBatchObject representing the cancelled file batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Cancels a vector store file batch. + description: Cancels a vector store file batch. + parameters: + - name: batch_id + in: path + description: The ID of the file batch to cancel. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: + get: + responses: + '200': + description: >- + A VectorStoreFilesListInBatchResponse containing the list of files in + the batch. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: >- + Returns a list of vector store files in a batch. + description: >- + Returns a list of vector store files in a batch. + parameters: + - name: batch_id + in: path + description: >- + The ID of the file batch to list files from. + required: true + schema: + type: string + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file batch. + required: true + schema: + type: string + - name: after + in: query + description: >- + A cursor for use in pagination. `after` is an object ID that defines your + place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + A cursor for use in pagination. `before` is an object ID that defines + your place in the list. + required: false + schema: + type: string + - name: filter + in: query + description: >- + Filter by file status. One of in_progress, completed, failed, cancelled. + required: false + schema: + type: string + - name: limit + in: query + description: >- + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + Sort order by the `created_at` timestamp of the objects. `asc` for ascending + order and `desc` for descending order. + required: false + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/files: + get: + responses: + '200': + description: >- + A VectorStoreListFilesResponse containing the list of files. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreListFilesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: List files in a vector store. + description: List files in a vector store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to list files from. + required: true + schema: + type: string + - name: limit + in: query + description: >- + (Optional) A limit on the number of objects to be returned. Limit can + range between 1 and 100, and the default is 20. + required: false + schema: + type: integer + - name: order + in: query + description: >- + (Optional) Sort order by the `created_at` timestamp of the objects. `asc` + for ascending order and `desc` for descending order. + required: false + schema: + type: string + - name: after + in: query + description: >- + (Optional) A cursor for use in pagination. `after` is an object ID that + defines your place in the list. + required: false + schema: + type: string + - name: before + in: query + description: >- + (Optional) A cursor for use in pagination. `before` is an object ID that + defines your place in the list. + required: false + schema: + type: string + - name: filter + in: query + description: >- + (Optional) Filter by file status to only return files with the specified + status. + required: false + schema: + $ref: '#/components/schemas/VectorStoreFileStatus' + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreFileObject representing the attached file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Attach a file to a vector store. + description: Attach a file to a vector store. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to attach the file to. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' + required: true + deprecated: false + /v1/vector_stores/{vector_store_id}/files/{file_id}: + get: + responses: + '200': + description: >- + A VectorStoreFileObject representing the file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Retrieves a vector store file. + description: Retrieves a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to retrieve. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to retrieve. + required: true + schema: + type: string + deprecated: false + post: + responses: + '200': + description: >- + A VectorStoreFileObject representing the updated file. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Updates a vector store file. + description: Updates a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to update. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' + required: true + deprecated: false + delete: + responses: + '200': + description: >- + A VectorStoreFileDeleteResponse indicating the deletion status. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Delete a vector store file. + description: Delete a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to delete. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to delete. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/files/{file_id}/content: + get: + responses: + '200': + description: >- + A list of InterleavedContent representing the file contents. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileContentsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: >- + Retrieves the contents of a vector store file. + description: >- + Retrieves the contents of a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the file to retrieve. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file to retrieve. + required: true + schema: + type: string + deprecated: false + /v1/vector_stores/{vector_store_id}/search: + post: + responses: + '200': + description: >- + A VectorStoreSearchResponse containing the search results. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreSearchResponsePage' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + summary: Search for chunks in a vector store. + description: >- + Search for chunks in a vector store. + + Searches a vector store for relevant chunks based on a query and optional + file attribute filters. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store to search. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' + required: true + deprecated: false + /v1/version: + get: + responses: + '200': + description: >- + Version information containing the service version number. + content: + application/json: + schema: + $ref: '#/components/schemas/VersionInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inspect + summary: Get version. + description: >- + Get version. + + Get the version of the service. + parameters: [] + deprecated: false + /v1beta/datasetio/append-rows/{dataset_id}: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + summary: Append rows to a dataset. + description: Append rows to a dataset. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to append the rows to. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AppendRowsRequest' + required: true + deprecated: false + /v1beta/datasetio/iterrows/{dataset_id}: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + summary: >- + Get a paginated list of rows from a dataset. + description: >- + Get a paginated list of rows from a dataset. + + Uses offset-based pagination where: + + - start_index: The starting index (0-based). If None, starts from beginning. + + - limit: Number of items to return. If None or -1, returns all items. + + + The response includes: + + - data: List of items for the current page. + + - has_more: Whether there are more items available after this set. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to get the rows from. + required: true + schema: + type: string + - name: start_index + in: query + description: >- + Index into dataset for the first row to get. Get all rows if None. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of rows to get. + required: false + schema: + type: integer + deprecated: false + /v1beta/datasets: + get: + responses: + '200': + description: A ListDatasetsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListDatasetsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: List all datasets. + description: List all datasets. + parameters: [] + deprecated: false + post: + responses: + '200': + description: A Dataset. + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Register a new dataset. + description: Register a new dataset. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterDatasetRequest' + required: true + deprecated: false + /v1beta/datasets/{dataset_id}: + get: + responses: + '200': + description: A Dataset. + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Get a dataset by its ID. + description: Get a dataset by its ID. + parameters: + - name: dataset_id + in: path + description: The ID of the dataset to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: Unregister a dataset by its ID. + description: Unregister a dataset by its ID. + parameters: + - name: dataset_id + in: path + description: The ID of the dataset to unregister. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all agents. + description: List all agents. + parameters: + - name: start_index + in: query + description: The index to start the pagination from. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of agents to return. + required: false + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: >- + An AgentCreateResponse with the agent ID. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Create an agent with the given configuration. + description: >- + Create an agent with the given configuration. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}: + get: + responses: + '200': + description: An Agent of the agent. + content: + application/json: + schema: + $ref: '#/components/schemas/Agent' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Describe an agent by its ID. + description: Describe an agent by its ID. + parameters: + - name: agent_id + in: path + description: ID of the agent. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Delete an agent by its ID and its associated sessions and turns. + description: >- + Delete an agent by its ID and its associated sessions and turns. + parameters: + - name: agent_id + in: path + description: The ID of the agent to delete. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session: + post: + responses: + '200': + description: An AgentSessionCreateResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentSessionCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a new session for an agent. + description: Create a new session for an agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to create the session for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentSessionRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}: + get: + responses: + '200': + description: A Session. + content: + application/json: + schema: + $ref: '#/components/schemas/Session' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent session by its ID. + description: Retrieve an agent session by its ID. + parameters: + - name: session_id + in: path + description: The ID of the session to get. + required: true + schema: + type: string + - name: agent_id + in: path + description: >- + The ID of the agent to get the session for. + required: true + schema: + type: string + - name: turn_ids + in: query + description: >- + (Optional) List of turn IDs to filter the session by. + required: false + schema: + type: array + items: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Delete an agent session by its ID and its associated turns. + description: >- + Delete an agent session by its ID and its associated turns. + parameters: + - name: session_id + in: path + description: The ID of the session to delete. + required: true + schema: + type: string + - name: agent_id + in: path + description: >- + The ID of the agent to delete the session for. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn: + post: + responses: + '200': + description: >- + If stream=False, returns a Turn object. If stream=True, returns an SSE + event stream of AgentTurnResponseStreamChunk. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + text/event-stream: + schema: + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Create a new turn for an agent. + description: Create a new turn for an agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to create the turn for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to create the turn for. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAgentTurnRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}: + get: + responses: + '200': + description: A Turn. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent turn by its ID. + description: Retrieve an agent turn by its ID. + parameters: + - name: agent_id + in: path + description: The ID of the agent to get the turn for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to get the turn for. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to get. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: + post: + responses: + '200': + description: >- + A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk + objects. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + text/event-stream: + schema: + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: >- + Resume an agent turn with executed tool call responses. + description: >- + Resume an agent turn with executed tool call responses. + + When a Turn has the status `awaiting_input` due to pending input from client + side tool calls, this endpoint can be used to submit the outputs from the + tool calls once they are ready. + parameters: + - name: agent_id + in: path + description: The ID of the agent to resume. + required: true + schema: + type: string + - name: session_id + in: path + description: The ID of the session to resume. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to resume. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ResumeAgentTurnRequest' + required: true + deprecated: false + /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: + get: + responses: + '200': + description: An AgentStepResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/AgentStepResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: Retrieve an agent step by its ID. + description: Retrieve an agent step by its ID. + parameters: + - name: agent_id + in: path + description: The ID of the agent to get the step for. + required: true + schema: + type: string + - name: session_id + in: path + description: >- + The ID of the session to get the step for. + required: true + schema: + type: string + - name: turn_id + in: path + description: The ID of the turn to get the step for. + required: true + schema: + type: string + - name: step_id + in: path + description: The ID of the step to get. + required: true + schema: + type: string + deprecated: false + /v1alpha/agents/{agent_id}/sessions: + get: + responses: + '200': + description: A PaginatedResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PaginatedResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + summary: List all session(s) of a given agent. + description: List all session(s) of a given agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to list sessions for. + required: true + schema: + type: string + - name: start_index + in: query + description: The index to start the pagination from. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of sessions to return. + required: false + schema: + type: integer + deprecated: false + /v1alpha/eval/benchmarks: + get: + responses: + '200': + description: A ListBenchmarksResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: List all benchmarks. + description: List all benchmarks. + parameters: [] + deprecated: false + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Register a benchmark. + description: Register a benchmark. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterBenchmarkRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}: + get: + responses: + '200': + description: A Benchmark. + content: + application/json: + schema: + $ref: '#/components/schemas/Benchmark' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Get a benchmark by its ID. + description: Get a benchmark by its ID. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to get. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Unregister a benchmark. + description: Unregister a benchmark. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to unregister. + required: true + schema: + type: string + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: + post: + responses: + '200': + description: >- + EvaluateResponse object containing generations and scores. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Evaluate a list of rows on a benchmark. + description: Evaluate a list of rows on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateRowsRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs: + post: + responses: + '200': + description: >- + The job that was created to run the evaluation. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Run an evaluation on a benchmark. + description: Run an evaluation on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunEvalRequest' + required: true + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: + get: + responses: + '200': + description: The status of the evaluation job. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the status of a job. + description: Get the status of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the status of. + required: true + schema: + type: string + deprecated: false + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Cancel a job. + description: Cancel a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to cancel. + required: true + schema: + type: string + deprecated: false + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: + get: + responses: + '200': + description: The result of the job. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the result of a job. + description: Get the result of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the result of. + required: true + schema: + type: string + deprecated: false + /v1alpha/inference/rerank: + post: + responses: + '200': + description: >- + RerankResponse with indices sorted by relevance score (descending). + content: + application/json: + schema: + $ref: '#/components/schemas/RerankResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Inference + summary: >- + Rerank a list of documents based on their relevance to a query. + description: >- + Rerank a list of documents based on their relevance to a query. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RerankRequest' + required: true + deprecated: false + /v1alpha/post-training/job/artifacts: + get: + responses: + '200': + description: A PostTrainingJobArtifactsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the artifacts of a training job. + description: Get the artifacts of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the artifacts of. + required: true + schema: + type: string + deprecated: false + /v1alpha/post-training/job/cancel: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Cancel a training job. + description: Cancel a training job. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CancelTrainingJobRequest' + required: true + deprecated: false + /v1alpha/post-training/job/status: + get: + responses: + '200': + description: A PostTrainingJobStatusResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobStatusResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the status of a training job. + description: Get the status of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the status of. + required: true + schema: + type: string + deprecated: false + /v1alpha/post-training/jobs: + get: + responses: + '200': + description: A ListPostTrainingJobsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get all training jobs. + description: Get all training jobs. + parameters: [] + deprecated: false + /v1alpha/post-training/preference-optimize: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run preference optimization of a model. + description: Run preference optimization of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/PreferenceOptimizeRequest' + required: true + deprecated: false + /v1alpha/post-training/supervised-fine-tune: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run supervised fine-tuning of a model. + description: Run supervised fine-tuning of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SupervisedFineTuneRequest' + required: true + deprecated: false +jsonSchemaDialect: >- + https://json-schema.org/draft/2020-12/schema +components: + schemas: + Error: + type: object + properties: + status: + type: integer + description: HTTP status code + title: + type: string + description: >- + Error title, a short summary of the error which is invariant for an error + type + detail: + type: string + description: >- + Error detail, a longer human-readable description of the error + instance: + type: string + description: >- + (Optional) A URL which can be used to retrieve more information about + the specific occurrence of the error + additionalProperties: false + required: + - status + - title + - detail + title: Error + description: >- + Error response from the API. Roughly follows RFC 7807. + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + description: >- + List of chat completion objects with their input messages + has_more: + type: boolean + description: >- + Whether there are more completions available beyond this list + first_id: + type: string + description: ID of the first completion in this list + last_id: + type: string + description: ID of the last completion in this list + object: + type: string + const: list + default: list + description: >- + Must be "list" to identify this as a list response + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIChatCompletionResponse + description: >- + Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + type: object + properties: + role: + type: string + const: assistant + default: assistant + description: >- + Must be "assistant" to identify this as the model's response + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The content of the model's response + name: + type: string + description: >- + (Optional) The name of the assistant message participant. + tool_calls: + type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + description: >- + List of tool calls. Each tool call is an OpenAIChatCompletionToolCall + object. + additionalProperties: false + required: + - role + title: OpenAIAssistantMessageParam + description: >- + A message containing the model's (assistant) response in an OpenAI-compatible + chat completion request. + "OpenAIChatCompletionContentPartImageParam": + type: object + properties: + type: + type: string + const: image_url + default: image_url + description: >- + Must be "image_url" to identify this as image content + image_url: + $ref: '#/components/schemas/OpenAIImageURL' + description: >- + Image URL specification and processing details + additionalProperties: false + required: + - type + - image_url + title: >- + OpenAIChatCompletionContentPartImageParam + description: >- + Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + - $ref: '#/components/schemas/OpenAIFile' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + file: '#/components/schemas/OpenAIFile' + OpenAIChatCompletionContentPartTextParam: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Must be "text" to identify this as text content + text: + type: string + description: The text content of the message + additionalProperties: false + required: + - type + - text + title: OpenAIChatCompletionContentPartTextParam + description: >- + Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: + type: object + properties: + index: + type: integer + description: >- + (Optional) Index of the tool call in the list + id: + type: string + description: >- + (Optional) Unique identifier for the tool call + type: + type: string + const: function + default: function + description: >- + Must be "function" to identify this as a function call + function: + $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + description: (Optional) Function call details + additionalProperties: false + required: + - type + title: OpenAIChatCompletionToolCall + description: >- + Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: + type: object + properties: + name: + type: string + description: (Optional) Name of the function to call + arguments: + type: string + description: >- + (Optional) Arguments to pass to the function as a JSON string + additionalProperties: false + title: OpenAIChatCompletionToolCallFunction + description: >- + Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: + type: object + properties: + prompt_tokens: + type: integer + description: Number of tokens in the prompt + completion_tokens: + type: integer + description: Number of tokens in the completion + total_tokens: + type: integer + description: Total tokens used (prompt + completion) + prompt_tokens_details: + type: object + properties: + cached_tokens: + type: integer + description: Number of tokens retrieved from cache + additionalProperties: false + title: >- + OpenAIChatCompletionUsagePromptTokensDetails + description: >- + Token details for prompt tokens in OpenAI chat completion usage. + completion_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + description: >- + Number of tokens used for reasoning (o1/o3 models) + additionalProperties: false + title: >- + OpenAIChatCompletionUsageCompletionTokensDetails + description: >- + Token details for output tokens in OpenAI chat completion usage. + additionalProperties: false + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: >- + Usage information for OpenAI chat completion. + OpenAIChoice: + type: object + properties: + message: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + - $ref: '#/components/schemas/OpenAISystemMessageParam' + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + - $ref: '#/components/schemas/OpenAIToolMessageParam' + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/OpenAIUserMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + description: The message from the model + finish_reason: + type: string + description: The reason the model stopped generating + index: + type: integer + description: The index of the choice + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: >- + A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/OpenAITokenLogProb' + description: >- + (Optional) The log probabilities for the tokens in the message + refusal: + type: array + items: + $ref: '#/components/schemas/OpenAITokenLogProb' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + title: OpenAIChoiceLogprobs + description: >- + The log probabilities for the tokens in the message from an OpenAI-compatible + chat completion response. + OpenAIDeveloperMessageParam: + type: object + properties: + role: + type: string + const: developer + default: developer + description: >- + Must be "developer" to identify this as a developer message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The content of the developer message + name: + type: string + description: >- + (Optional) The name of the developer message participant. + additionalProperties: false + required: + - role + - content + title: OpenAIDeveloperMessageParam + description: >- + A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: + type: object + properties: + type: + type: string + const: file + default: file + file: + $ref: '#/components/schemas/OpenAIFileFile' + additionalProperties: false + required: + - type + - file + title: OpenAIFile + OpenAIFileFile: + type: object + properties: + file_data: + type: string + file_id: + type: string + filename: + type: string + additionalProperties: false + title: OpenAIFileFile + OpenAIImageURL: + type: object + properties: + url: + type: string + description: >- + URL of the image to include in the message + detail: + type: string + description: >- + (Optional) Level of detail for image processing. Can be "low", "high", + or "auto" + additionalProperties: false + required: + - url + title: OpenAIImageURL + description: >- + Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + - $ref: '#/components/schemas/OpenAISystemMessageParam' + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + - $ref: '#/components/schemas/OpenAIToolMessageParam' + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/OpenAIUserMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + OpenAISystemMessageParam: + type: object + properties: + role: + type: string + const: system + default: system + description: >- + Must be "system" to identify this as a system message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: >- + The content of the "system prompt". If multiple system messages are provided, + they are concatenated. The underlying Llama Stack code may also add other + system messages (for example, for formatting tool definitions). + name: + type: string + description: >- + (Optional) The name of the system message participant. + additionalProperties: false + required: + - role + - content + title: OpenAISystemMessageParam + description: >- + A system message providing instructions or context to the model. + OpenAITokenLogProb: + type: object + properties: + token: + type: string + bytes: + type: array + items: + type: integer + logprob: + type: number + top_logprobs: + type: array + items: + $ref: '#/components/schemas/OpenAITopLogProb' + additionalProperties: false + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: >- + The log probability for a token from an OpenAI-compatible chat completion + response. + OpenAIToolMessageParam: + type: object + properties: + role: + type: string + const: tool + default: tool + description: >- + Must be "tool" to identify this as a tool response + tool_call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + description: The response content from the tool + additionalProperties: false + required: + - role + - tool_call_id + - content + title: OpenAIToolMessageParam + description: >- + A message representing the result of a tool invocation in an OpenAI-compatible + chat completion request. + OpenAITopLogProb: + type: object + properties: + token: + type: string + bytes: + type: array + items: + type: integer + logprob: + type: number + additionalProperties: false + required: + - token + - logprob + title: OpenAITopLogProb + description: >- + The top log probability for a token from an OpenAI-compatible chat completion + response. + OpenAIUserMessageParam: + type: object + properties: + role: + type: string + const: user + default: user + description: >- + Must be "user" to identify this as a user message + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' + description: >- + The content of the message, which can include text and other media + name: + type: string + description: >- + (Optional) The name of the user message participant. + additionalProperties: false + required: + - role + - content + title: OpenAIUserMessageParam + description: >- + A message from the user in an OpenAI-compatible chat completion request. + OpenAIJSONSchema: + type: object + properties: + name: + type: string + description: Name of the schema + description: + type: string + description: (Optional) Description of the schema + strict: + type: boolean + description: >- + (Optional) Whether to enforce strict adherence to the schema + schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The JSON schema definition + additionalProperties: false + required: + - name + title: OpenAIJSONSchema + description: >- + JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: + type: object + properties: + type: + type: string + const: json_object + default: json_object + description: >- + Must be "json_object" to indicate generic JSON object response format + additionalProperties: false + required: + - type + title: OpenAIResponseFormatJSONObject + description: >- + JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: + type: object + properties: + type: + type: string + const: json_schema + default: json_schema + description: >- + Must be "json_schema" to indicate structured JSON response format + json_schema: + $ref: '#/components/schemas/OpenAIJSONSchema' + description: >- + The JSON schema specification for the response + additionalProperties: false + required: + - type + - json_schema + title: OpenAIResponseFormatJSONSchema + description: >- + JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/OpenAIResponseFormatText' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + OpenAIResponseFormatText: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Must be "text" to indicate plain text response format + additionalProperties: false + required: + - type + title: OpenAIResponseFormatText + description: >- + Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be registered with + Llama Stack and available via the /models endpoint. + messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + description: List of messages in the conversation. + frequency_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + function_call: + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The function call to use. + functions: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) List of functions to use. + logit_bias: + type: object + additionalProperties: + type: number + description: (Optional) The logit bias to use. + logprobs: + type: boolean + description: (Optional) The log probabilities to use. + max_completion_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + max_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + n: + type: integer + description: >- + (Optional) The number of completions to generate. + parallel_tool_calls: + type: boolean + description: >- + (Optional) Whether to parallelize tool calls. + presence_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + response_format: + $ref: '#/components/schemas/OpenAIResponseFormatParam' + description: (Optional) The response format to use. + seed: + type: integer + description: (Optional) The seed to use. + stop: + oneOf: + - type: string + - type: array + items: + type: string + description: (Optional) The stop tokens to use. + stream: + type: boolean + description: >- + (Optional) Whether to stream the response. + stream_options: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The stream options to use. + temperature: + type: number + description: (Optional) The temperature to use. + tool_choice: + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The tool choice to use. + tools: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The tools to use. + top_logprobs: + type: integer + description: >- + (Optional) The top log probabilities to use. + top_p: + type: number + description: (Optional) The top p to use. + user: + type: string + description: (Optional) The user to use. + additionalProperties: false + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + additionalProperties: false + required: + - id + - choices + - object + - created + - model + title: OpenAIChatCompletion + description: >- + Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChunkChoice' + description: List of choices + object: + type: string + const: chat.completion.chunk + default: chat.completion.chunk + description: >- + The object type, which will be "chat.completion.chunk" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information (typically included in final chunk with stream_options) + additionalProperties: false + required: + - id + - choices + - object + - created + - model + title: OpenAIChatCompletionChunk + description: >- + Chunk from a streaming response to an OpenAI-compatible chat completion request. + OpenAIChoiceDelta: + type: object + properties: + content: + type: string + description: (Optional) The content of the delta + refusal: + type: string + description: (Optional) The refusal of the delta + role: + type: string + description: (Optional) The role of the delta + tool_calls: + type: array + items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + description: (Optional) The tool calls of the delta + reasoning_content: + type: string + description: >- + (Optional) The reasoning content from the model (non-standard, for o1/o3 + models) + additionalProperties: false + title: OpenAIChoiceDelta + description: >- + A delta from an OpenAI-compatible chat completion streaming response. + OpenAIChunkChoice: + type: object + properties: + delta: + $ref: '#/components/schemas/OpenAIChoiceDelta' + description: The delta from the chunk + finish_reason: + type: string + description: The reason the model stopped generating + index: + type: integer + description: The index of the choice + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + description: >- + (Optional) The log probabilities for the tokens in the message + additionalProperties: false + required: + - delta + - finish_reason + - index + title: OpenAIChunkChoice + description: >- + A chunk choice from an OpenAI-compatible chat completion streaming response. + OpenAICompletionWithInputMessages: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + usage: + $ref: '#/components/schemas/OpenAIChatCompletionUsage' + description: >- + Token usage information for the completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + OpenAICompletionRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be registered with + Llama Stack and available via the /models endpoint. + prompt: + oneOf: + - type: string + - type: array + items: + type: string + - type: array + items: + type: integer + - type: array + items: + type: array + items: + type: integer + description: The prompt to generate a completion for. + best_of: + type: integer + description: >- + (Optional) The number of completions to generate. + echo: + type: boolean + description: (Optional) Whether to echo the prompt. + frequency_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + logit_bias: + type: object + additionalProperties: + type: number + description: (Optional) The logit bias to use. + logprobs: + type: boolean + description: (Optional) The log probabilities to use. + max_tokens: + type: integer + description: >- + (Optional) The maximum number of tokens to generate. + n: + type: integer + description: >- + (Optional) The number of completions to generate. + presence_penalty: + type: number + description: >- + (Optional) The penalty for repeated tokens. + seed: + type: integer + description: (Optional) The seed to use. + stop: + oneOf: + - type: string + - type: array + items: + type: string + description: (Optional) The stop tokens to use. + stream: + type: boolean + description: >- + (Optional) Whether to stream the response. + stream_options: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) The stream options to use. + temperature: + type: number + description: (Optional) The temperature to use. + top_p: + type: number + description: (Optional) The top p to use. + user: + type: string + description: (Optional) The user to use. + suffix: + type: string + description: >- + (Optional) The suffix that should be appended to the completion. + additionalProperties: false + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: + type: object + properties: + id: + type: string + choices: + type: array + items: + $ref: '#/components/schemas/OpenAICompletionChoice' + created: + type: integer + model: + type: string + object: + type: string + const: text_completion + default: text_completion + additionalProperties: false + required: + - id + - choices + - created + - model + - object + title: OpenAICompletion + description: >- + Response from an OpenAI-compatible completion request. + OpenAICompletionChoice: + type: object + properties: + finish_reason: + type: string + text: + type: string + index: + type: integer + logprobs: + $ref: '#/components/schemas/OpenAIChoiceLogprobs' + additionalProperties: false + required: + - finish_reason + - text + - index + title: OpenAICompletionChoice + description: >- + A choice from an OpenAI-compatible completion response. + ConversationItem: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + description: >- + Annotation type identifier, always "url_citation" + end_index: + type: integer + description: >- + End position of the citation span in the content + start_index: + type: integer + description: >- + Start position of the citation span in the content + title: + type: string + description: Title of the referenced web resource + url: + type: string + description: URL of the referenced web resource + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: >- + URL citation annotation for referencing external web resources. + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + description: >- + Annotation type identifier, always "file_citation" + file_id: + type: string + description: Unique identifier of the referenced file + filename: + type: string + description: Name of the referenced file + index: + type: integer + description: >- + Position index of the citation within the content + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: >- + File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + OpenAIResponseContentPartRefusal: + type: object + properties: + type: + type: string + const: refusal + default: refusal + description: >- + Content part type identifier, always "refusal" + refusal: + type: string + description: Refusal text supplied by the model + additionalProperties: false + required: + - type + - refusal + title: OpenAIResponseContentPartRefusal + description: >- + Refusal content within a streamed response part. + "OpenAIResponseInputFunctionToolCallOutput": + type: object + properties: + call_id: + type: string + output: + type: string + type: + type: string + const: function_call_output + default: function_call_output + id: + type: string + status: + type: string + additionalProperties: false + required: + - call_id + - output + - type + title: >- + OpenAIResponseInputFunctionToolCallOutput + description: >- + This represents the output of a function call that gets passed back to the + model. + OpenAIResponseInputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + discriminator: + propertyName: type + mapping: + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + OpenAIResponseInputMessageContentImage: + type: object + properties: + detail: + oneOf: + - type: string + const: low + - type: string + const: high + - type: string + const: auto + default: auto + description: >- + Level of detail for image processing, can be "low", "high", or "auto" + type: + type: string + const: input_image + default: input_image + description: >- + Content type identifier, always "input_image" + image_url: + type: string + description: (Optional) URL of the image content + additionalProperties: false + required: + - detail + - type + title: OpenAIResponseInputMessageContentImage + description: >- + Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: + type: object + properties: + text: + type: string + description: The text content of the input message + type: + type: string + const: input_text + default: input_text + description: >- + Content type identifier, always "input_text" + additionalProperties: false + required: + - text + - type + title: OpenAIResponseInputMessageContentText + description: >- + Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + type: object + properties: + arguments: + type: string + id: + type: string + name: + type: string + server_label: + type: string + type: + type: string + const: mcp_approval_request + default: mcp_approval_request + additionalProperties: false + required: + - arguments + - id + - name + - server_label + - type + title: OpenAIResponseMCPApprovalRequest + description: >- + A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + type: object + properties: + approval_request_id: + type: string + approve: + type: boolean + type: + type: string + const: mcp_approval_response + default: mcp_approval_response + id: + type: string + reason: + type: string + additionalProperties: false + required: + - approval_request_id + - approve + - type + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. + OpenAIResponseMessage: + type: object + properties: + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessageContent' + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + role: + oneOf: + - type: string + const: system + - type: string + const: developer + - type: string + const: user + - type: string + const: assistant + type: + type: string + const: message + default: message + id: + type: string + status: + type: string + additionalProperties: false + required: + - content + - role + - type + title: OpenAIResponseMessage + description: >- + Corresponds to the various Message types in the Responses API. They are all + under one type because the Responses API gives them all the same "type" value, + and there is no way to tell them apart in certain scenarios. + OpenAIResponseOutputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + "OpenAIResponseOutputMessageContentOutputText": + type: object + properties: + text: + type: string + type: + type: string + const: output_text + default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + additionalProperties: false + required: + - text + - type + - annotations + title: >- + OpenAIResponseOutputMessageContentOutputText + "OpenAIResponseOutputMessageFileSearchToolCall": + type: object + properties: + id: + type: string + description: Unique identifier for this tool call + queries: + type: array + items: + type: string + description: List of search queries executed + status: + type: string + description: >- + Current status of the file search operation + type: + type: string + const: file_search_call + default: file_search_call + description: >- + Tool call type identifier, always "file_search_call" + results: + type: array + items: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value attributes associated with the file + file_id: + type: string + description: >- + Unique identifier of the file containing the result + filename: + type: string + description: Name of the file containing the result + score: + type: number + description: >- + Relevance score for this search result (between 0 and 1) + text: + type: string + description: Text content of the search result + additionalProperties: false + required: + - attributes + - file_id + - filename + - score + - text + title: >- + OpenAIResponseOutputMessageFileSearchToolCallResults + description: >- + Search results returned by the file search operation. + description: >- + (Optional) Search results returned by the file search operation + additionalProperties: false + required: + - id + - queries + - status + - type + title: >- + OpenAIResponseOutputMessageFileSearchToolCall + description: >- + File search tool call output message for OpenAI responses. + "OpenAIResponseOutputMessageFunctionToolCall": + type: object + properties: + call_id: + type: string + description: Unique identifier for the function call + name: + type: string + description: Name of the function being called + arguments: + type: string + description: >- + JSON string containing the function arguments + type: + type: string + const: function_call + default: function_call + description: >- + Tool call type identifier, always "function_call" + id: + type: string + description: >- + (Optional) Additional identifier for the tool call + status: + type: string + description: >- + (Optional) Current status of the function call execution + additionalProperties: false + required: + - call_id + - name + - arguments + - type + title: >- + OpenAIResponseOutputMessageFunctionToolCall + description: >- + Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: + type: object + properties: + id: + type: string + description: Unique identifier for this MCP call + type: + type: string + const: mcp_call + default: mcp_call + description: >- + Tool call type identifier, always "mcp_call" + arguments: + type: string + description: >- + JSON string containing the MCP call arguments + name: + type: string + description: Name of the MCP method being called + server_label: + type: string + description: >- + Label identifying the MCP server handling the call + error: + type: string + description: >- + (Optional) Error message if the MCP call failed + output: + type: string + description: >- + (Optional) Output result from the successful MCP call + additionalProperties: false + required: + - id + - type + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: >- + Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: + type: object + properties: + id: + type: string + description: >- + Unique identifier for this MCP list tools operation + type: + type: string + const: mcp_list_tools + default: mcp_list_tools + description: >- + Tool call type identifier, always "mcp_list_tools" + server_label: + type: string + description: >- + Label identifying the MCP server providing the tools + tools: + type: array + items: + type: object + properties: + input_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + JSON schema defining the tool's input parameters + name: + type: string + description: Name of the tool + description: + type: string + description: >- + (Optional) Description of what the tool does + additionalProperties: false + required: + - input_schema + - name + title: MCPListToolsTool + description: >- + Tool definition returned by MCP list tools operation. + description: >- + List of available tools provided by the MCP server + additionalProperties: false + required: + - id + - type + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: >- + MCP list tools output message containing available tools from an MCP server. + "OpenAIResponseOutputMessageWebSearchToolCall": + type: object + properties: + id: + type: string + description: Unique identifier for this tool call + status: + type: string + description: >- + Current status of the web search operation + type: + type: string + const: web_search_call + default: web_search_call + description: >- + Tool call type identifier, always "web_search_call" + additionalProperties: false + required: + - id + - status + - type + title: >- + OpenAIResponseOutputMessageWebSearchToolCall + description: >- + Web search tool call output message for OpenAI responses. + CreateConversationRequest: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + description: >- + Initial items to include in the conversation context. + metadata: + type: object + additionalProperties: + type: string + description: >- + Set of key-value pairs that can be attached to an object. + additionalProperties: false + title: CreateConversationRequest + Conversation: + type: object + properties: + id: + type: string + object: + type: string + const: conversation + default: conversation + created_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + items: + type: array + items: + type: object + title: dict + description: >- + dict() -> new empty dictionary dict(mapping) -> new dictionary initialized + from a mapping object's (key, value) pairs dict(iterable) -> new + dictionary initialized as if via: d = {} for k, v in iterable: d[k] + = v dict(**kwargs) -> new dictionary initialized with the name=value + pairs in the keyword argument list. For example: dict(one=1, two=2) + additionalProperties: false + required: + - id + - object + - created_at + title: Conversation + description: OpenAI-compatible conversation object. + UpdateConversationRequest: + type: object + properties: + metadata: + type: object + additionalProperties: + type: string + description: >- + Set of key-value pairs that can be attached to an object. + additionalProperties: false + required: + - metadata + title: UpdateConversationRequest + ConversationDeletedResource: + type: object + properties: + id: + type: string + object: + type: string + default: conversation.deleted + deleted: + type: boolean + default: true + additionalProperties: false + required: + - id + - object + - deleted + title: ConversationDeletedResource + description: Response for deleted conversation. + ConversationItemList: + type: object + properties: + object: + type: string + default: list + data: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ConversationItemList + description: >- + List of conversation items with pagination. + AddItemsRequest: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/ConversationItem' + description: >- + Items to include in the conversation context. + additionalProperties: false + required: + - items + title: AddItemsRequest + ConversationItemDeletedResource: + type: object + properties: + id: + type: string + object: + type: string + default: conversation.item.deleted + deleted: + type: boolean + default: true + additionalProperties: false + required: + - id + - object + - deleted + title: ConversationItemDeletedResource + description: Response for deleted conversation item. + OpenAIEmbeddingsRequestWithExtraBody: + type: object + properties: + model: + type: string + description: >- + The identifier of the model to use. The model must be an embedding model + registered with Llama Stack and available via the /models endpoint. + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input text to embed, encoded as a string or array of strings. To embed + multiple inputs in a single request, pass an array of strings. + encoding_format: + type: string + default: float + description: >- + (Optional) The format to return the embeddings in. Can be either "float" + or "base64". Defaults to "float". + dimensions: + type: integer + description: >- + (Optional) The number of dimensions the resulting output embeddings should + have. Only supported in text-embedding-3 and later models. + user: + type: string + description: >- + (Optional) A unique identifier representing your end-user, which can help + OpenAI to monitor and detect abuse. + additionalProperties: false + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: + type: object + properties: + object: + type: string + const: embedding + default: embedding + description: >- + The object type, which will be "embedding" + embedding: + oneOf: + - type: array + items: + type: number + - type: string + description: >- + The embedding vector as a list of floats (when encoding_format="float") + or as a base64-encoded string (when encoding_format="base64") + index: + type: integer + description: >- + The index of the embedding in the input list + additionalProperties: false + required: + - object + - embedding + - index + title: OpenAIEmbeddingData + description: >- + A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: + type: object + properties: + prompt_tokens: + type: integer + description: The number of tokens in the input + total_tokens: + type: integer + description: The total number of tokens used + additionalProperties: false + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: >- + Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: + type: object + properties: + object: + type: string + const: list + default: list + description: The object type, which will be "list" + data: + type: array + items: + $ref: '#/components/schemas/OpenAIEmbeddingData' + description: List of embedding data objects + model: + type: string + description: >- + The model that was used to generate the embeddings + usage: + $ref: '#/components/schemas/OpenAIEmbeddingUsage' + description: Usage information + additionalProperties: false + required: + - object + - data + - model + - usage + title: OpenAIEmbeddingsResponse + description: >- + Response from an OpenAI-compatible embeddings request. + OpenAIFilePurpose: + type: string + enum: + - assistants + - batch + title: OpenAIFilePurpose + description: >- + Valid purpose values for OpenAI Files API. + ListOpenAIFileResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIFileObject' + description: List of file objects + has_more: + type: boolean + description: >- + Whether there are more files available beyond this page + first_id: + type: string + description: >- + ID of the first file in the list for pagination + last_id: + type: string + description: >- + ID of the last file in the list for pagination + object: + type: string + const: list + default: list + description: The object type, which is always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIFileResponse + description: >- + Response for listing files in OpenAI Files API. + OpenAIFileObject: + type: object + properties: + object: + type: string + const: file + default: file + description: The object type, which is always "file" + id: + type: string + description: >- + The file identifier, which can be referenced in the API endpoints + bytes: + type: integer + description: The size of the file, in bytes + created_at: + type: integer + description: >- + The Unix timestamp (in seconds) for when the file was created + expires_at: + type: integer + description: >- + The Unix timestamp (in seconds) for when the file expires + filename: + type: string + description: The name of the file + purpose: + type: string + enum: + - assistants + - batch + description: The intended purpose of the file + additionalProperties: false + required: + - object + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: >- + OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: + type: object + properties: + anchor: + type: string + const: created_at + seconds: + type: integer + additionalProperties: false + required: + - anchor + - seconds + title: ExpiresAfter + description: >- + Control expiration of uploaded files. + + Params: + - anchor, must be "created_at" + - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) + OpenAIFileDeleteResponse: + type: object + properties: + id: + type: string + description: The file identifier that was deleted + object: + type: string + const: file + default: file + description: The object type, which is always "file" + deleted: + type: boolean + description: >- + Whether the file was successfully deleted + additionalProperties: false + required: + - id + - object + - deleted + title: OpenAIFileDeleteResponse + description: >- + Response for deleting a file in OpenAI Files API. + Response: + type: object + title: Response + HealthInfo: + type: object + properties: + status: + type: string + enum: + - OK + - Error + - Not Implemented + description: Current health status of the service + additionalProperties: false + required: + - status + title: HealthInfo + description: >- + Health status information for the service. + RouteInfo: + type: object + properties: + route: + type: string + description: The API endpoint path + method: + type: string + description: HTTP method for the route + provider_types: + type: array + items: + type: string + description: >- + List of provider types that implement this route + additionalProperties: false + required: + - route + - method + - provider_types + title: RouteInfo + description: >- + Information about an API route including its path, method, and implementing + providers. + ListRoutesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RouteInfo' + description: >- + List of available route information objects + additionalProperties: false + required: + - data + title: ListRoutesResponse + description: >- + Response containing a list of all available API routes. + Model: + type: object + properties: + identifier: + type: string + description: >- + Unique identifier for this resource in llama stack + provider_resource_id: + type: string + description: >- + Unique identifier for this resource in the provider + provider_id: + type: string + description: >- + ID of the provider that owns this resource + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: model + default: model + description: >- + The resource type, always 'model' for model resources + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + description: >- + The type of model (LLM or embedding model) + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - model_type + title: Model + description: >- + A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + title: ModelType + description: >- + Enumeration of supported model types in Llama Stack. + ListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Model' + additionalProperties: false + required: + - data + title: ListModelsResponse + RegisterModelRequest: + type: object + properties: + model_id: + type: string + description: The identifier of the model to register. + provider_model_id: + type: string + description: >- + The identifier of the model in the provider. + provider_id: + type: string + description: The identifier of the provider. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model. + model_type: + $ref: '#/components/schemas/ModelType' + description: The type of model to register. + additionalProperties: false + required: + - model_id + title: RegisterModelRequest + RunModerationRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input (or inputs) to classify. Can be a single string, an array of strings, + or an array of multi-modal input objects similar to other models. + model: + type: string + description: >- + The content moderation model you would like to use. + additionalProperties: false + required: + - input + - model + title: RunModerationRequest + ModerationObject: + type: object + properties: + id: + type: string + description: >- + The unique identifier for the moderation request. + model: + type: string + description: >- + The model used to generate the moderation results. + results: + type: array + items: + $ref: '#/components/schemas/ModerationObjectResults' + description: A list of moderation objects + additionalProperties: false + required: + - id + - model + - results + title: ModerationObject + description: A moderation object. + ModerationObjectResults: + type: object + properties: + flagged: + type: boolean + description: >- + Whether any of the below categories are flagged. + categories: + type: object + additionalProperties: + type: boolean + description: >- + A list of the categories, and whether they are flagged or not. + category_applied_input_types: + type: object + additionalProperties: + type: array + items: + type: string + description: >- + A list of the categories along with the input type(s) that the score applies + to. + category_scores: + type: object + additionalProperties: + type: number + description: >- + A list of the categories along with their scores as predicted by model. + user_message: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - flagged + - metadata + title: ModerationObjectResults + description: A moderation object. + Prompt: + type: object + properties: + prompt: + type: string + description: >- + The system prompt text with variable placeholders. Variables are only + supported when using the Responses API. + version: + type: integer + description: >- + Version (integer starting at 1, incremented on save) + prompt_id: + type: string + description: >- + Unique identifier formatted as 'pmpt_<48-digit-hash>' + variables: + type: array + items: + type: string + description: >- + List of prompt variable names that can be used in the prompt template + is_default: + type: boolean + default: false + description: >- + Boolean indicating whether this version is the default version for this + prompt + additionalProperties: false + required: + - version + - prompt_id + - variables + - is_default + title: Prompt + description: >- + A prompt resource representing a stored OpenAI Compatible prompt template + in Llama Stack. + ListPromptsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Prompt' + additionalProperties: false + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + CreatePromptRequest: + type: object + properties: + prompt: + type: string + description: >- + The prompt text content with variable placeholders. + variables: + type: array + items: + type: string + description: >- + List of variable names that can be used in the prompt template. + additionalProperties: false + required: + - prompt + title: CreatePromptRequest + UpdatePromptRequest: + type: object + properties: + prompt: + type: string + description: The updated prompt text content. + version: + type: integer + description: >- + The current version of the prompt being updated. + variables: + type: array + items: + type: string + description: >- + Updated list of variable names that can be used in the prompt template. + set_as_default: + type: boolean + description: >- + Set the new version as the default (default=True). + additionalProperties: false + required: + - prompt + - version + - set_as_default + title: UpdatePromptRequest + SetDefaultVersionRequest: + type: object + properties: + version: + type: integer + description: The version to set as default. + additionalProperties: false + required: + - version + title: SetDefaultVersionRequest + ProviderInfo: + type: object + properties: + api: + type: string + description: The API name this provider implements + provider_id: + type: string + description: Unique identifier for the provider + provider_type: + type: string + description: The type of provider implementation + config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Configuration parameters for the provider + health: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Current health status of the provider + additionalProperties: false + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: >- + Information about a registered provider including its configuration and health + status. + ListProvidersResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ProviderInfo' + description: List of provider information objects + additionalProperties: false + required: + - data + title: ListProvidersResponse + description: >- + Response containing a list of all available providers. + ListOpenAIResponseObject: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseObjectWithInput' + description: >- + List of response objects with their input context + has_more: + type: boolean + description: >- + Whether there are more results available beyond this page + first_id: + type: string + description: >- + Identifier of the first item in this page + last_id: + type: string + description: Identifier of the last item in this page + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIResponseObject + description: >- + Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: + type: object + properties: + code: + type: string + description: >- + Error code identifying the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: OpenAIResponseError + description: >- + Error details for failed OpenAI response requests. + OpenAIResponseInput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMessage' + OpenAIResponseInputToolFileSearch: + type: object + properties: + type: + type: string + const: file_search + default: file_search + description: >- + Tool type identifier, always "file_search" + vector_store_ids: + type: array + items: + type: string + description: >- + List of vector store identifiers to search within + filters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional filters to apply to the search + max_num_results: + type: integer + default: 10 + description: >- + (Optional) Maximum number of search results to return (1-50) + ranking_options: + type: object + properties: + ranker: + type: string + description: >- + (Optional) Name of the ranking algorithm to use + score_threshold: + type: number + default: 0.0 + description: >- + (Optional) Minimum relevance score threshold for results + additionalProperties: false + description: >- + (Optional) Options for ranking and scoring search results + additionalProperties: false + required: + - type + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: >- + File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: + type: object + properties: + type: + type: string + const: function + default: function + description: Tool type identifier, always "function" + name: + type: string + description: Name of the function that can be called + description: + type: string + description: >- + (Optional) Description of what the function does + parameters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON schema defining the function's parameters + strict: + type: boolean + description: >- + (Optional) Whether to enforce strict parameter validation + additionalProperties: false + required: + - type + - name + title: OpenAIResponseInputToolFunction + description: >- + Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: + type: object + properties: + type: + oneOf: + - type: string + const: web_search + - type: string + const: web_search_preview + - type: string + const: web_search_preview_2025_03_11 + default: web_search + description: Web search tool type variant to use + search_context_size: + type: string + default: medium + description: >- + (Optional) Size of search context, must be "low", "medium", or "high" + additionalProperties: false + required: + - type + title: OpenAIResponseInputToolWebSearch + description: >- + Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseTool' + description: >- + (Optional) An array of tools the model may call while generating a response. + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + usage: + $ref: '#/components/schemas/OpenAIResponseUsage' + description: >- + (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context + input: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + List of input items that led to this response + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + - input + title: OpenAIResponseObjectWithInput + description: >- + OpenAI response object extended with input context information. + OpenAIResponseOutput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + OpenAIResponseText: + type: object + properties: + format: + type: object + properties: + type: + oneOf: + - type: string + const: text + - type: string + const: json_schema + - type: string + const: json_object + description: >- + Must be "text", "json_schema", or "json_object" to identify the format + type + name: + type: string + description: >- + The name of the response format. Only used for json_schema. + schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The JSON schema the response should conform to. In a Python SDK, this + is often a `pydantic` model. Only used for json_schema. + description: + type: string + description: >- + (Optional) A description of the response format. Only used for json_schema. + strict: + type: boolean + description: >- + (Optional) Whether to strictly enforce the JSON schema. If true, the + response must match the schema exactly. Only used for json_schema. + additionalProperties: false + required: + - type + description: >- + (Optional) Text format configuration specifying output format requirements + additionalProperties: false + title: OpenAIResponseText + description: >- + Text response configuration for OpenAI responses. + OpenAIResponseTool: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + discriminator: + propertyName: type + mapping: + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + OpenAIResponseToolMCP: + type: object + properties: + type: + type: string + const: mcp + default: mcp + description: Tool type identifier, always "mcp" + server_label: + type: string + description: Label to identify this MCP server + allowed_tools: + oneOf: + - type: array + items: + type: string + - type: object + properties: + tool_names: + type: array + items: + type: string + description: >- + (Optional) List of specific tool names that are allowed + additionalProperties: false + title: AllowedToolsFilter + description: >- + Filter configuration for restricting which MCP tools can be used. + description: >- + (Optional) Restriction on which tools can be used from this server + additionalProperties: false + required: + - type + - server_label + title: OpenAIResponseToolMCP + description: >- + Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: + type: object + properties: + input_tokens: + type: integer + description: Number of tokens in the input + output_tokens: + type: integer + description: Number of tokens in the output + total_tokens: + type: integer + description: Total tokens used (input + output) + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + description: Number of tokens retrieved from cache + additionalProperties: false + description: Detailed breakdown of input token usage + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + description: >- + Number of tokens used for reasoning (o1/o3 models) + additionalProperties: false + description: Detailed breakdown of output token usage + additionalProperties: false + required: + - input_tokens + - output_tokens + - total_tokens + title: OpenAIResponseUsage + description: Usage information for OpenAI response. + ResponseGuardrailSpec: + type: object + properties: + type: + type: string + description: The type/identifier of the guardrail. + additionalProperties: false + required: + - type + title: ResponseGuardrailSpec + description: >- + Specification for a guardrail to apply during response generation. + OpenAIResponseInputTool: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + discriminator: + propertyName: type + mapping: + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + OpenAIResponseInputToolMCP: + type: object + properties: + type: + type: string + const: mcp + default: mcp + description: Tool type identifier, always "mcp" + server_label: + type: string + description: Label to identify this MCP server + server_url: + type: string + description: URL endpoint of the MCP server + headers: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) HTTP headers to include when connecting to the server + require_approval: + oneOf: + - type: string + const: always + - type: string + const: never + - type: object + properties: + always: + type: array + items: + type: string + description: >- + (Optional) List of tool names that always require approval + never: + type: array + items: + type: string + description: >- + (Optional) List of tool names that never require approval + additionalProperties: false + title: ApprovalFilter + description: >- + Filter configuration for MCP tool approval requirements. + default: never + description: >- + Approval requirement for tool calls ("always", "never", or filter) + allowed_tools: + oneOf: + - type: array + items: + type: string + - type: object + properties: + tool_names: + type: array + items: + type: string + description: >- + (Optional) List of specific tool names that are allowed + additionalProperties: false + title: AllowedToolsFilter + description: >- + Filter configuration for restricting which MCP tools can be used. + description: >- + (Optional) Restriction on which tools can be used from this server + additionalProperties: false + required: + - type + - server_label + - server_url + - require_approval + title: OpenAIResponseInputToolMCP + description: >- + Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: Input message(s) to create the response. + model: + type: string + description: The underlying LLM used for completions. + instructions: + type: string + previous_response_id: + type: string + description: >- + (Optional) if specified, the new response will be a continuation of the + previous response. This can be used to easily fork-off new responses from + existing responses. + conversation: + type: string + description: >- + (Optional) The ID of a conversation to add the response to. Must begin + with 'conv_'. Input and output messages will be automatically added to + the conversation. + store: + type: boolean + stream: + type: boolean + temperature: + type: number + text: + $ref: '#/components/schemas/OpenAIResponseText' + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputTool' + include: + type: array + items: + type: string + description: >- + (Optional) Additional fields to include in the response. + max_infer_iters: + type: integer + additionalProperties: false + required: + - input + - model + title: CreateOpenaiResponseRequest + OpenAIResponseObject: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseTool' + description: >- + (Optional) An array of tools the model may call while generating a response. + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + usage: + $ref: '#/components/schemas/OpenAIResponseUsage' + description: >- + (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + title: OpenAIResponseObject + description: >- + Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + type: object + properties: + type: + type: string + const: output_text + default: output_text + description: >- + Content part type identifier, always "output_text" + text: + type: string + description: Text emitted for this content part + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + description: >- + Structured annotations associated with the text + logprobs: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) Token log probability details + additionalProperties: false + required: + - type + - text + - annotations + title: OpenAIResponseContentPartOutputText + description: >- + Text content within a streamed response part. + "OpenAIResponseContentPartReasoningSummary": + type: object + properties: + type: + type: string + const: summary_text + default: summary_text + description: >- + Content part type identifier, always "summary_text" + text: + type: string + description: Summary text + additionalProperties: false + required: + - type + - text + title: >- + OpenAIResponseContentPartReasoningSummary + description: >- + Reasoning summary part in a streamed response. + OpenAIResponseContentPartReasoningText: + type: object + properties: + type: + type: string + const: reasoning_text + default: reasoning_text + description: >- + Content part type identifier, always "reasoning_text" + text: + type: string + description: Reasoning text supplied by the model + additionalProperties: false + required: + - type + - text + title: OpenAIResponseContentPartReasoningText + description: >- + Reasoning text emitted as part of a streamed response. + OpenAIResponseObjectStream: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + discriminator: + propertyName: type + mapping: + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + "OpenAIResponseObjectStreamResponseCompleted": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Completed response object + type: + type: string + const: response.completed + default: response.completed + description: >- + Event type identifier, always "response.completed" + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCompleted + description: >- + Streaming event indicating a response has been completed. + "OpenAIResponseObjectStreamResponseContentPartAdded": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the part within the content array + response_id: + type: string + description: >- + Unique identifier of the response containing this content + item_id: + type: string + description: >- + Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response + part: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + description: The content part that was added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.content_part.added + default: response.content_part.added + description: >- + Event type identifier, always "response.content_part.added" + additionalProperties: false + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseContentPartAdded + description: >- + Streaming event for when a new content part is added to a response item. + "OpenAIResponseObjectStreamResponseContentPartDone": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the part within the content array + response_id: + type: string + description: >- + Unique identifier of the response containing this content + item_id: + type: string + description: >- + Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response + part: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + description: The completed content part + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.content_part.done + default: response.content_part.done + description: >- + Event type identifier, always "response.content_part.done" + additionalProperties: false + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseContentPartDone + description: >- + Streaming event for when a content part is completed. + "OpenAIResponseObjectStreamResponseCreated": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: The response object that was created + type: + type: string + const: response.created + default: response.created + description: >- + Event type identifier, always "response.created" + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCreated + description: >- + Streaming event indicating a new response has been created. + OpenAIResponseObjectStreamResponseFailed: + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Response object describing the failure + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.failed + default: response.failed + description: >- + Event type identifier, always "response.failed" + additionalProperties: false + required: + - response + - sequence_number + - type + title: OpenAIResponseObjectStreamResponseFailed + description: >- + Streaming event emitted when a response fails. + "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the completed file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.completed + default: response.file_search_call.completed + description: >- + Event type identifier, always "response.file_search_call.completed" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallCompleted + description: >- + Streaming event for completed file search calls. + "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.in_progress + default: response.file_search_call.in_progress + description: >- + Event type identifier, always "response.file_search_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallInProgress + description: >- + Streaming event for file search calls in progress. + "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the file search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.file_search_call.searching + default: response.file_search_call.searching + description: >- + Event type identifier, always "response.file_search_call.searching" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFileSearchCallSearching + description: >- + Streaming event for file search currently searching. + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + type: object + properties: + delta: + type: string + description: >- + Incremental function call arguments being added + item_id: + type: string + description: >- + Unique identifier of the function call being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.function_call_arguments.delta + default: response.function_call_arguments.delta + description: >- + Event type identifier, always "response.function_call_arguments.delta" + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + description: >- + Streaming event for incremental function call argument updates. + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + type: object + properties: + arguments: + type: string + description: >- + Final complete arguments JSON string for the function call + item_id: + type: string + description: >- + Unique identifier of the completed function call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.function_call_arguments.done + default: response.function_call_arguments.done + description: >- + Event type identifier, always "response.function_call_arguments.done" + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + description: >- + Streaming event for when function call arguments are completed. + "OpenAIResponseObjectStreamResponseInProgress": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Current response state while in progress + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.in_progress + default: response.in_progress + description: >- + Event type identifier, always "response.in_progress" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseInProgress + description: >- + Streaming event indicating the response remains in progress. + "OpenAIResponseObjectStreamResponseIncomplete": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: >- + Response object describing the incomplete state + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.incomplete + default: response.incomplete + description: >- + Event type identifier, always "response.incomplete" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseIncomplete + description: >- + Streaming event emitted when a response ends in an incomplete state. + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + type: object + properties: + delta: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.delta + default: response.mcp_call.arguments.delta + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + type: object + properties: + arguments: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.done + default: response.mcp_call.arguments.done + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + "OpenAIResponseObjectStreamResponseMcpCallCompleted": + type: object + properties: + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.completed + default: response.mcp_call.completed + description: >- + Event type identifier, always "response.mcp_call.completed" + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallCompleted + description: Streaming event for completed MCP calls. + "OpenAIResponseObjectStreamResponseMcpCallFailed": + type: object + properties: + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.failed + default: response.mcp_call.failed + description: >- + Event type identifier, always "response.mcp_call.failed" + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallFailed + description: Streaming event for failed MCP calls. + "OpenAIResponseObjectStreamResponseMcpCallInProgress": + type: object + properties: + item_id: + type: string + description: Unique identifier of the MCP call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.mcp_call.in_progress + default: response.mcp_call.in_progress + description: >- + Event type identifier, always "response.mcp_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallInProgress + description: >- + Streaming event for MCP calls in progress. + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.completed + default: response.mcp_list_tools.completed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsCompleted + "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.failed + default: response.mcp_list_tools.failed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsFailed + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.in_progress + default: response.mcp_list_tools.in_progress + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsInProgress + "OpenAIResponseObjectStreamResponseOutputItemAdded": + type: object + properties: + response_id: + type: string + description: >- + Unique identifier of the response containing this output + item: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + description: >- + The output item that was added (message, tool call, etc.) + output_index: + type: integer + description: >- + Index position of this item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_item.added + default: response.output_item.added + description: >- + Event type identifier, always "response.output_item.added" + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemAdded + description: >- + Streaming event for when a new output item is added to the response. + "OpenAIResponseObjectStreamResponseOutputItemDone": + type: object + properties: + response_id: + type: string + description: >- + Unique identifier of the response containing this output + item: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + description: >- + The completed output item (message, tool call, etc.) + output_index: + type: integer + description: >- + Index position of this item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_item.done + default: response.output_item.done + description: >- + Event type identifier, always "response.output_item.done" + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemDone + description: >- + Streaming event for when an output item is completed. + "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the item to which the annotation is being added + output_index: + type: integer + description: >- + Index position of the output item in the response's output array + content_index: + type: integer + description: >- + Index position of the content part within the output item + annotation_index: + type: integer + description: >- + Index of the annotation within the content part + annotation: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + description: The annotation object being added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.annotation.added + default: response.output_text.annotation.added + description: >- + Event type identifier, always "response.output_text.annotation.added" + additionalProperties: false + required: + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + description: >- + Streaming event for when an annotation is added to output text. + "OpenAIResponseObjectStreamResponseOutputTextDelta": + type: object + properties: + content_index: + type: integer + description: Index position within the text content + delta: + type: string + description: Incremental text content being added + item_id: + type: string + description: >- + Unique identifier of the output item being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.delta + default: response.output_text.delta + description: >- + Event type identifier, always "response.output_text.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextDelta + description: >- + Streaming event for incremental text content updates. + "OpenAIResponseObjectStreamResponseOutputTextDone": + type: object + properties: + content_index: + type: integer + description: Index position within the text content + text: + type: string + description: >- + Final complete text content of the output item + item_id: + type: string + description: >- + Unique identifier of the completed output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.output_text.done + default: response.output_text.done + description: >- + Event type identifier, always "response.output_text.done" + additionalProperties: false + required: + - content_index + - text + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextDone + description: >- + Streaming event for when text output is completed. + "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + type: object + properties: + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + description: The summary part that was added + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_part.added + default: response.reasoning_summary_part.added + description: >- + Event type identifier, always "response.reasoning_summary_part.added" + additionalProperties: false + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + description: >- + Streaming event for when a new reasoning summary part is added. + "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + type: object + properties: + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + description: The completed summary part + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_part.done + default: response.reasoning_summary_part.done + description: >- + Event type identifier, always "response.reasoning_summary_part.done" + additionalProperties: false + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + description: >- + Streaming event for when a reasoning summary part is completed. + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + type: object + properties: + delta: + type: string + description: Incremental summary text being added + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_text.delta + default: response.reasoning_summary_text.delta + description: >- + Event type identifier, always "response.reasoning_summary_text.delta" + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + description: >- + Streaming event for incremental reasoning summary text updates. + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + type: object + properties: + text: + type: string + description: Final complete summary text + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: Index position of the output item + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + summary_index: + type: integer + description: >- + Index of the summary part within the reasoning summary + type: + type: string + const: response.reasoning_summary_text.done + default: response.reasoning_summary_text.done + description: >- + Event type identifier, always "response.reasoning_summary_text.done" + additionalProperties: false + required: + - text + - item_id + - output_index + - sequence_number + - summary_index + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + description: >- + Streaming event for when reasoning summary text is completed. + "OpenAIResponseObjectStreamResponseReasoningTextDelta": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the reasoning content part + delta: + type: string + description: Incremental reasoning text being added + item_id: + type: string + description: >- + Unique identifier of the output item being updated + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.reasoning_text.delta + default: response.reasoning_text.delta + description: >- + Event type identifier, always "response.reasoning_text.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningTextDelta + description: >- + Streaming event for incremental reasoning text updates. + "OpenAIResponseObjectStreamResponseReasoningTextDone": + type: object + properties: + content_index: + type: integer + description: >- + Index position of the reasoning content part + text: + type: string + description: Final complete reasoning text + item_id: + type: string + description: >- + Unique identifier of the completed output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.reasoning_text.done + default: response.reasoning_text.done + description: >- + Event type identifier, always "response.reasoning_text.done" + additionalProperties: false + required: + - content_index + - text + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseReasoningTextDone + description: >- + Streaming event for when reasoning text is completed. + "OpenAIResponseObjectStreamResponseRefusalDelta": + type: object + properties: + content_index: + type: integer + description: Index position of the content part + delta: + type: string + description: Incremental refusal text being added + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.refusal.delta + default: response.refusal.delta + description: >- + Event type identifier, always "response.refusal.delta" + additionalProperties: false + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseRefusalDelta + description: >- + Streaming event for incremental refusal text updates. + "OpenAIResponseObjectStreamResponseRefusalDone": + type: object + properties: + content_index: + type: integer + description: Index position of the content part + refusal: + type: string + description: Final complete refusal text + item_id: + type: string + description: Unique identifier of the output item + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.refusal.done + default: response.refusal.done + description: >- + Event type identifier, always "response.refusal.done" + additionalProperties: false + required: + - content_index + - refusal + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseRefusalDone + description: >- + Streaming event for when refusal text is completed. + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + type: object + properties: + item_id: + type: string + description: >- + Unique identifier of the completed web search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.web_search_call.completed + default: response.web_search_call.completed + description: >- + Event type identifier, always "response.web_search_call.completed" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallCompleted + description: >- + Streaming event for completed web search calls. + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + type: object + properties: + item_id: + type: string + description: Unique identifier of the web search call + output_index: + type: integer + description: >- + Index position of the item in the output list + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.web_search_call.in_progress + default: response.web_search_call.in_progress + description: >- + Event type identifier, always "response.web_search_call.in_progress" + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallInProgress + description: >- + Streaming event for web search calls in progress. + "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.web_search_call.searching + default: response.web_search_call.searching + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallSearching + OpenAIDeleteResponseObject: + type: object + properties: + id: + type: string + description: >- + Unique identifier of the deleted response + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + deleted: + type: boolean + default: true + description: Deletion confirmation flag, always True + additionalProperties: false + required: + - id + - object + - deleted + title: OpenAIDeleteResponseObject + description: >- + Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: List of input items + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - object + title: ListOpenAIResponseInputItem + description: >- + List container for OpenAI response input items. + RunShieldRequest: + type: object + properties: + shield_id: + type: string + description: The identifier of the shield to run. + messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + description: The messages to run the shield on. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + - messages + - params + title: RunShieldRequest + RunShieldResponse: + type: object + properties: + violation: + $ref: '#/components/schemas/SafetyViolation' + description: >- + (Optional) Safety violation detected by the shield, if any + additionalProperties: false + title: RunShieldResponse + description: Response from running a safety shield. + SafetyViolation: + type: object + properties: + violation_level: + $ref: '#/components/schemas/ViolationLevel' + description: Severity level of the violation + user_message: + type: string + description: >- + (Optional) Message to convey to the user about the violation + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Additional metadata including specific violation codes for debugging and + telemetry + additionalProperties: false + required: + - violation_level + - metadata + title: SafetyViolation + description: >- + Details of a safety violation detected by content moderation. + ViolationLevel: + type: string + enum: + - info + - warn + - error + title: ViolationLevel + description: Severity level of a safety violation. + AgentTurnInputType: + type: object + properties: + type: + type: string + const: agent_turn_input + default: agent_turn_input + description: >- + Discriminator type. Always "agent_turn_input" + additionalProperties: false + required: + - type + title: AgentTurnInputType + description: Parameter type for agent turn input. + AggregationFunctionType: + type: string + enum: + - average + - weighted_average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: >- + Types of aggregation functions for scoring results. + ArrayType: + type: object + properties: + type: + type: string + const: array + default: array + description: Discriminator type. Always "array" + additionalProperties: false + required: + - type + title: ArrayType + description: Parameter type for array values. + BasicScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: basic + default: basic + description: >- + The type of scoring function parameters, always basic + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - aggregation_functions + title: BasicScoringFnParams + description: >- + Parameters for basic scoring function configuration. + BooleanType: + type: object + properties: + type: + type: string + const: boolean + default: boolean + description: Discriminator type. Always "boolean" + additionalProperties: false + required: + - type + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + type: object + properties: + type: + type: string + const: chat_completion_input + default: chat_completion_input + description: >- + Discriminator type. Always "chat_completion_input" + additionalProperties: false + required: + - type + title: ChatCompletionInputType + description: >- + Parameter type for chat completion input. + CompletionInputType: + type: object + properties: + type: + type: string + const: completion_input + default: completion_input + description: >- + Discriminator type. Always "completion_input" + additionalProperties: false + required: + - type + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + type: object + properties: + type: + type: string + const: json + default: json + description: Discriminator type. Always "json" + additionalProperties: false + required: + - type + title: JsonType + description: Parameter type for JSON values. + LLMAsJudgeScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: llm_as_judge + default: llm_as_judge + description: >- + The type of scoring function parameters, always llm_as_judge + judge_model: + type: string + description: >- + Identifier of the LLM model to use as a judge for scoring + prompt_template: + type: string + description: >- + (Optional) Custom prompt template for the judge model + judge_score_regexes: + type: array + items: + type: string + description: >- + Regexes to extract the answer from generated response + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - judge_model + - judge_score_regexes + - aggregation_functions + title: LLMAsJudgeScoringFnParams + description: >- + Parameters for LLM-as-judge scoring function configuration. + NumberType: + type: object + properties: + type: + type: string + const: number + default: number + description: Discriminator type. Always "number" + additionalProperties: false + required: + - type + title: NumberType + description: Parameter type for numeric values. + ObjectType: + type: object + properties: + type: + type: string + const: object + default: object + description: Discriminator type. Always "object" + additionalProperties: false + required: + - type + title: ObjectType + description: Parameter type for object values. + RegexParserScoringFnParams: + type: object + properties: + type: + $ref: '#/components/schemas/ScoringFnParamsType' + const: regex_parser + default: regex_parser + description: >- + The type of scoring function parameters, always regex_parser + parsing_regexes: + type: array + items: + type: string + description: >- + Regex to extract the answer from generated response + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + description: >- + Aggregation functions to apply to the scores of each row + additionalProperties: false + required: + - type + - parsing_regexes + - aggregation_functions + title: RegexParserScoringFnParams + description: >- + Parameters for regex parser scoring function configuration. + ScoringFn: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: scoring_function + default: scoring_function + description: >- + The resource type, always scoring_function + description: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + return_type: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + params: + $ref: '#/components/schemas/ScoringFnParams' + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - return_type + title: ScoringFn + description: >- + A scoring function resource for evaluating model outputs. + ScoringFnParams: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + - $ref: '#/components/schemas/RegexParserScoringFnParams' + - $ref: '#/components/schemas/BasicScoringFnParams' + discriminator: + propertyName: type + mapping: + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + basic: '#/components/schemas/BasicScoringFnParams' + ScoringFnParamsType: + type: string + enum: + - llm_as_judge + - regex_parser + - basic + title: ScoringFnParamsType + description: >- + Types of scoring function parameter configurations. + StringType: + type: object + properties: + type: + type: string + const: string + default: string + description: Discriminator type. Always "string" + additionalProperties: false + required: + - type + title: StringType + description: Parameter type for string values. + UnionType: + type: object + properties: + type: + type: string + const: union + default: union + description: Discriminator type. Always "union" + additionalProperties: false + required: + - type + title: UnionType + description: Parameter type for union values. + ListScoringFunctionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ScoringFn' + additionalProperties: false + required: + - data + title: ListScoringFunctionsResponse + ParamType: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + RegisterScoringFunctionRequest: + type: object + properties: + scoring_fn_id: + type: string + description: >- + The ID of the scoring function to register. + description: + type: string + description: The description of the scoring function. + return_type: + $ref: '#/components/schemas/ParamType' + description: The return type of the scoring function. + provider_scoring_fn_id: + type: string + description: >- + The ID of the provider scoring function to use for the scoring function. + provider_id: + type: string + description: >- + The ID of the provider to use for the scoring function. + params: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + The parameters for the scoring function for benchmark eval, these can + be overridden for app eval. + additionalProperties: false + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + ScoreRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + additionalProperties: false + required: + - input_rows + - scoring_functions + title: ScoreRequest + ScoreResponse: + type: object + properties: + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult. + additionalProperties: false + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoringResult: + type: object + properties: + score_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The scoring result for each row. Each row is a map of column name to value. + aggregated_results: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Map of metric name to aggregated value + additionalProperties: false + required: + - score_rows + - aggregated_results + title: ScoringResult + description: A scoring result for a single row. + ScoreBatchRequest: + type: object + properties: + dataset_id: + type: string + description: The ID of the dataset to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + save_results_dataset: + type: boolean + description: >- + Whether to save the results to a dataset. + additionalProperties: false + required: + - dataset_id + - scoring_functions + - save_results_dataset + title: ScoreBatchRequest + ScoreBatchResponse: + type: object + properties: + dataset_id: + type: string + description: >- + (Optional) The identifier of the dataset that was scored + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult + additionalProperties: false + required: + - results + title: ScoreBatchResponse + description: >- + Response from batch scoring operations on datasets. + Shield: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: shield + default: shield + description: The resource type, always shield + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Configuration parameters for the shield + additionalProperties: false + required: + - identifier + - provider_id + - type + title: Shield + description: >- + A safety shield resource that can be used to check content. + ListShieldsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Shield' + additionalProperties: false + required: + - data + title: ListShieldsResponse + RegisterShieldRequest: + type: object + properties: + shield_id: + type: string + description: >- + The identifier of the shield to register. + provider_shield_id: + type: string + description: >- + The identifier of the shield in the provider. + provider_id: + type: string + description: The identifier of the provider. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + title: RegisterShieldRequest + CompletionMessage: + type: object + properties: + role: + type: string + const: assistant + default: assistant + description: >- + Must be "assistant" to identify this as the model's response + content: + $ref: '#/components/schemas/InterleavedContent' + description: The content of the model's response + stop_reason: + type: string + enum: + - end_of_turn + - end_of_message + - out_of_tokens + description: >- + Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: + The model finished generating the entire response. - `StopReason.end_of_message`: + The model finished generating but generated a partial response -- usually, + a tool call. The user may call the tool and continue the conversation + with the tool's response. - `StopReason.out_of_tokens`: The model ran + out of token budget. + tool_calls: + type: array + items: + $ref: '#/components/schemas/ToolCall' + description: >- + List of tool calls. Each tool call is a ToolCall object. + additionalProperties: false + required: + - role + - content + - stop_reason + title: CompletionMessage + description: >- + A message containing the model's (assistant) response in a chat conversation. + ImageContentItem: + type: object + properties: + type: + type: string + const: image + default: image + description: >- + Discriminator type of the content item. Always "image" + image: + type: object + properties: + url: + $ref: '#/components/schemas/URL' + description: >- + A URL of the image or data URL in the format of data:image/{type};base64,{data}. + Note that URL could have length limits. + data: + type: string + contentEncoding: base64 + description: base64 encoded image data as string + additionalProperties: false + description: >- + Image as a base64 encoded string or an URL + additionalProperties: false + required: + - type + - image + title: ImageContentItem + description: A image content item + InterleavedContent: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + InterleavedContentItem: + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + - $ref: '#/components/schemas/TextContentItem' + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + Message: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/SystemMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + - $ref: '#/components/schemas/CompletionMessage' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/UserMessage' + system: '#/components/schemas/SystemMessage' + tool: '#/components/schemas/ToolResponseMessage' + assistant: '#/components/schemas/CompletionMessage' + SystemMessage: + type: object + properties: + role: + type: string + const: system + default: system + description: >- + Must be "system" to identify this as a system message + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the "system prompt". If multiple system messages are provided, + they are concatenated. The underlying Llama Stack code may also add other + system messages (for example, for formatting tool definitions). + additionalProperties: false + required: + - role + - content + title: SystemMessage + description: >- + A system message providing instructions or context to the model. + TextContentItem: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Discriminator type of the content item. Always "text" + text: + type: string + description: Text content + additionalProperties: false + required: + - type + - text + title: TextContentItem + description: A text content item + ToolCall: + type: object + properties: + call_id: + type: string + tool_name: + oneOf: + - type: string + enum: + - brave_search + - wolfram_alpha + - photogen + - code_interpreter + title: BuiltinTool + - type: string + arguments: + type: string + additionalProperties: false + required: + - call_id + - tool_name + - arguments + title: ToolCall + ToolResponseMessage: + type: object + properties: + role: + type: string + const: tool + default: tool + description: >- + Must be "tool" to identify this as a tool response + call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + content: + $ref: '#/components/schemas/InterleavedContent' + description: The response content from the tool + additionalProperties: false + required: + - role + - call_id + - content + title: ToolResponseMessage + description: >- + A message representing the result of a tool invocation. + URL: + type: object + properties: + uri: + type: string + description: The URL string pointing to the resource + additionalProperties: false + required: + - uri + title: URL + description: A URL reference to external content. + UserMessage: + type: object + properties: + role: + type: string + const: user + default: user + description: >- + Must be "user" to identify this as a user message + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the message, which can include text and other media + context: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) This field is used internally by Llama Stack to pass RAG context. + This field may be removed in the API in the future. + additionalProperties: false + required: + - role + - content + title: UserMessage + description: >- + A message from the user in a chat conversation. + SyntheticDataGenerateRequest: + type: object + properties: + dialogs: + type: array + items: + $ref: '#/components/schemas/Message' + description: >- + List of conversation messages to use as input for synthetic data generation + filtering_function: + type: string + enum: + - none + - random + - top_k + - top_p + - top_k_top_p + - sigmoid + description: >- + Type of filtering to apply to generated synthetic data samples + model: + type: string + description: >- + (Optional) The identifier of the model to use. The model must be registered + with Llama Stack and available via the /models endpoint + additionalProperties: false + required: + - dialogs + - filtering_function + title: SyntheticDataGenerateRequest + SyntheticDataGenerationResponse: + type: object + properties: + synthetic_data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + List of generated synthetic data samples that passed the filtering criteria + statistics: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Statistical information about the generation process and filtering + results + additionalProperties: false + required: + - synthetic_data + title: SyntheticDataGenerationResponse + description: >- + Response from the synthetic data generation. Batch of (prompt, response, score) + tuples that pass the threshold. + InvokeToolRequest: + type: object + properties: + tool_name: + type: string + description: The name of the tool to invoke. + kwargs: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool. + additionalProperties: false + required: + - tool_name + - kwargs + title: InvokeToolRequest + ToolInvocationResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The output content from the tool execution + error_message: + type: string + description: >- + (Optional) Error message if the tool execution failed + error_code: + type: integer + description: >- + (Optional) Numeric error code if the tool execution failed + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool execution + additionalProperties: false + title: ToolInvocationResult + description: Result of a tool invocation. + ToolDef: + type: object + properties: + toolgroup_id: + type: string + description: >- + (Optional) ID of the tool group this tool belongs to + name: + type: string + description: Name of the tool + description: + type: string + description: >- + (Optional) Human-readable description of what the tool does + input_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON Schema for tool inputs (MCP inputSchema) + output_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) JSON Schema for tool outputs (MCP outputSchema) + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool + additionalProperties: false + required: + - name + title: ToolDef + description: >- + Tool definition used in runtime contexts. + ListToolDefsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolDef' + description: List of tool definitions + additionalProperties: false + required: + - data + title: ListToolDefsResponse + description: >- + Response containing a list of tool definitions. + ToolGroup: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool_group + default: tool_group + description: Type of resource, always 'tool_group' + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + (Optional) Model Context Protocol endpoint for remote tools + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional arguments for the tool group + additionalProperties: false + required: + - identifier + - provider_id + - type + title: ToolGroup + description: >- + A group of related tools managed together. + ListToolGroupsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolGroup' + description: List of tool groups + additionalProperties: false + required: + - data + title: ListToolGroupsResponse + description: >- + Response containing a list of tool groups. + RegisterToolGroupRequest: + type: object + properties: + toolgroup_id: + type: string + description: The ID of the tool group to register. + provider_id: + type: string + description: >- + The ID of the provider to use for the tool group. + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + The MCP endpoint to use for the tool group. + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool group. + additionalProperties: false + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. + InsertChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to insert the chunks into. + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + The chunks to insert. Each `Chunk` should contain content which can be + interleaved text, images, or other types. `metadata`: `dict[str, Any]` + and `embedding`: `List[float]` are optional. If `metadata` is provided, + you configure how Llama Stack formats the chunk during generation. If + `embedding` is not provided, it will be computed later. + ttl_seconds: + type: integer + description: The time to live of the chunks. + additionalProperties: false + required: + - vector_db_id + - chunks + title: InsertChunksRequest + QueryChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to query. + query: + $ref: '#/components/schemas/InterleavedContent' + description: The query to search for. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the query. + additionalProperties: false + required: + - vector_db_id + - query + title: QueryChunksRequest + QueryChunksResponse: + type: object + properties: + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + List of content chunks returned from the query + scores: + type: array + items: + type: number + description: >- + Relevance scores corresponding to each returned chunk + additionalProperties: false + required: + - chunks + - scores + title: QueryChunksResponse + description: >- + Response from querying chunks in a vector database. + VectorStoreFileCounts: + type: object + properties: + completed: + type: integer + description: >- + Number of files that have been successfully processed + cancelled: + type: integer + description: >- + Number of files that had their processing cancelled + failed: + type: integer + description: Number of files that failed to process + in_progress: + type: integer + description: >- + Number of files currently being processed + total: + type: integer + description: >- + Total number of files in the vector store + additionalProperties: false + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: >- + File processing status counts for a vector store. + VectorStoreListResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreObject' + description: List of vector store objects + first_id: + type: string + description: >- + (Optional) ID of the first vector store in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last vector store in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more vector stores available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreListResponse + description: Response from listing vector stores. + VectorStoreObject: + type: object + properties: + id: + type: string + description: Unique identifier for the vector store + object: + type: string + default: vector_store + description: >- + Object type identifier, always "vector_store" + created_at: + type: integer + description: >- + Timestamp when the vector store was created + name: + type: string + description: (Optional) Name of the vector store + usage_bytes: + type: integer + default: 0 + description: >- + Storage space used by the vector store in bytes + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + description: >- + File processing status counts for the vector store + status: + type: string + default: completed + description: Current status of the vector store + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Expiration policy for the vector store + expires_at: + type: integer + description: >- + (Optional) Timestamp when the vector store will expire + last_active_at: + type: integer + description: >- + (Optional) Timestamp of last activity on the vector store + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of key-value pairs that can be attached to the vector store + additionalProperties: false + required: + - id + - object + - created_at + - usage_bytes + - file_counts + - status + - metadata + title: VectorStoreObject + description: OpenAI Vector Store object. + "OpenAICreateVectorStoreRequestWithExtraBody": + type: object + properties: + name: + type: string + description: (Optional) A name for the vector store + file_ids: + type: array + items: + type: string + description: >- + List of file IDs to include in the vector store + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Expiration policy for the vector store + chunking_strategy: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Strategy for splitting files into chunks + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of key-value pairs that can be attached to the vector store + additionalProperties: false + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + type: object + properties: + name: + type: string + description: The name of the vector store. + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The expiration policy for a vector store. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of 16 key-value pairs that can be attached to an object. + additionalProperties: false + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: + type: object + properties: + id: + type: string + description: >- + Unique identifier of the deleted vector store + object: + type: string + default: vector_store.deleted + description: >- + Object type identifier for the deletion response + deleted: + type: boolean + default: true + description: >- + Whether the deletion operation was successful + additionalProperties: false + required: + - id + - object + - deleted + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": + type: object + properties: + file_ids: + type: array + items: + type: string + description: >- + A list of File IDs that the vector store should use + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value attributes to store with the files + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + (Optional) The chunking strategy used to chunk the file(s). Defaults to + auto + additionalProperties: false + required: + - file_ids + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file batch + object: + type: string + default: vector_store.file_batch + description: >- + Object type identifier, always "vector_store.file_batch" + created_at: + type: integer + description: >- + Timestamp when the file batch was created + vector_store_id: + type: string + description: >- + ID of the vector store containing the file batch + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: >- + Current processing status of the file batch + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + description: >- + File processing status counts for the batch + additionalProperties: false + required: + - id + - object + - created_at + - vector_store_id + - status + - file_counts + title: VectorStoreFileBatchObject + description: OpenAI Vector Store File Batch object. + VectorStoreFileStatus: + oneOf: + - type: string + const: completed + - type: string + const: in_progress + - type: string + const: cancelled + - type: string + const: failed + VectorStoreFileLastError: + type: object + properties: + code: + oneOf: + - type: string + const: server_error + - type: string + const: rate_limit_exceeded + description: >- + Error code indicating the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: VectorStoreFileLastError + description: >- + Error information for failed vector store file processing. + VectorStoreFileObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file + object: + type: string + default: vector_store.file + description: >- + Object type identifier, always "vector_store.file" + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + chunking_strategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + description: >- + Strategy used for splitting the file into chunks + created_at: + type: integer + description: >- + Timestamp when the file was added to the vector store + last_error: + $ref: '#/components/schemas/VectorStoreFileLastError' + description: >- + (Optional) Error information if file processing failed + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: Current processing status of the file + usage_bytes: + type: integer + default: 0 + description: Storage space used by this file in bytes + vector_store_id: + type: string + description: >- + ID of the vector store containing this file + additionalProperties: false + required: + - id + - object + - attributes + - chunking_strategy + - created_at + - status + - usage_bytes + - vector_store_id + title: VectorStoreFileObject + description: OpenAI Vector Store File object. + VectorStoreFilesListInBatchResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreFileObject' + description: >- + List of vector store file objects in the batch + first_id: + type: string + description: >- + (Optional) ID of the first file in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last file in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more files available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreFilesListInBatchResponse + description: >- + Response from listing files in a vector store file batch. + VectorStoreListFilesResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreFileObject' + description: List of vector store file objects + first_id: + type: string + description: >- + (Optional) ID of the first file in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last file in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more files available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreListFilesResponse + description: >- + Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: + type: object + properties: + file_id: + type: string + description: >- + The ID of the file to attach to the vector store. + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The key-value attributes stored with the file, which can be used for filtering. + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + The chunking strategy to use for the file. + additionalProperties: false + required: + - file_id + title: OpenaiAttachFileToVectorStoreRequest + OpenaiUpdateVectorStoreFileRequest: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The updated key-value attributes to store with the file. + additionalProperties: false + required: + - attributes + title: OpenaiUpdateVectorStoreFileRequest + VectorStoreFileDeleteResponse: + type: object + properties: + id: + type: string + description: Unique identifier of the deleted file + object: + type: string + default: vector_store.file.deleted + description: >- + Object type identifier for the deletion response + deleted: + type: boolean + default: true + description: >- + Whether the deletion operation was successful + additionalProperties: false + required: + - id + - object + - deleted + title: VectorStoreFileDeleteResponse + description: >- + Response from deleting a vector store file. + VectorStoreContent: + type: object + properties: + type: + type: string + const: text + description: >- + Content type, currently only "text" is supported + text: + type: string + description: The actual text content + additionalProperties: false + required: + - type + - text + title: VectorStoreContent + description: >- + Content item from a vector store file or search result. + VectorStoreFileContentsResponse: + type: object + properties: + file_id: + type: string + description: Unique identifier for the file + filename: + type: string + description: Name of the file + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + content: + type: array + items: + $ref: '#/components/schemas/VectorStoreContent' + description: List of content items from the file + additionalProperties: false + required: + - file_id + - filename + - attributes + - content + title: VectorStoreFileContentsResponse + description: >- + Response from retrieving the contents of a vector store file. + OpenaiSearchVectorStoreRequest: + type: object + properties: + query: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + The query string or array for performing the search. + filters: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Filters based on file attributes to narrow the search results. + max_num_results: + type: integer + description: >- + Maximum number of results to return (1 to 50 inclusive, default 10). + ranking_options: + type: object + properties: + ranker: + type: string + description: >- + (Optional) Name of the ranking algorithm to use + score_threshold: + type: number + default: 0.0 + description: >- + (Optional) Minimum relevance score threshold for results + additionalProperties: false + description: >- + Ranking options for fine-tuning the search results. + rewrite_query: + type: boolean + description: >- + Whether to rewrite the natural language query for vector search (default + false) + search_mode: + type: string + description: >- + The search mode to use - "keyword", "vector", or "hybrid" (default "vector") + additionalProperties: false + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + type: object + properties: + file_id: + type: string + description: >- + Unique identifier of the file containing the result + filename: + type: string + description: Name of the file containing the result + score: + type: number + description: Relevance score for this search result + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: number + - type: boolean + description: >- + (Optional) Key-value attributes associated with the file + content: + type: array + items: + $ref: '#/components/schemas/VectorStoreContent' + description: >- + List of content items matching the search query + additionalProperties: false + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + type: object + properties: + object: + type: string + default: vector_store.search_results.page + description: >- + Object type identifier for the search results page + search_query: + type: string + description: >- + The original search query that was executed + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + description: List of search result objects + has_more: + type: boolean + default: false + description: >- + Whether there are more results available beyond this page + next_page: + type: string + description: >- + (Optional) Token for retrieving the next page of results + additionalProperties: false + required: + - object + - search_query + - data + - has_more + title: VectorStoreSearchResponsePage + description: >- + Paginated response from searching a vector store. + VersionInfo: + type: object + properties: + version: + type: string + description: Version number of the service + additionalProperties: false + required: + - version + title: VersionInfo + description: Version information for the service. + AppendRowsRequest: + type: object + properties: + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to append to the dataset. + additionalProperties: false + required: + - rows + title: AppendRowsRequest + PaginatedResponse: + type: object + properties: + data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The list of items for the current page + has_more: + type: boolean + description: >- + Whether there are more items available after this set + url: + type: string + description: The URL for accessing this list + additionalProperties: false + required: + - data + - has_more + title: PaginatedResponse + description: >- + A generic paginated response that follows a simple format. + Dataset: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: dataset + default: dataset + description: >- + Type of resource, always 'dataset' for datasets + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + Purpose of the dataset indicating its intended use + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + description: >- + Data source configuration for the dataset + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the dataset + additionalProperties: false + required: + - identifier + - provider_id + - type + - purpose + - source + - metadata + title: Dataset + description: >- + Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + type: object + properties: + type: + type: string + const: rows + default: rows + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", + "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, + world!"}]} ] + additionalProperties: false + required: + - type + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + type: object + properties: + type: + type: string + const: uri + default: uri + uri: + type: string + description: >- + The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" + additionalProperties: false + required: + - type + - uri + title: URIDataSource + description: >- + A dataset that can be obtained from a URI. + ListDatasetsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Dataset' + description: List of datasets + additionalProperties: false + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + DataSource: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + RegisterDatasetRequest: + type: object + properties: + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + The purpose of the dataset. One of: - "post-training/messages": The dataset + contains a messages column with list of messages for post-training. { + "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", + "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset + contains a question column and an answer column for evaluation. { "question": + "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": + The dataset contains a messages column with list of messages and an answer + column for evaluation. { "messages": [ {"role": "user", "content": "Hello, + my name is John Doe."}, {"role": "assistant", "content": "Hello, John + Doe. How can I help you today?"}, {"role": "user", "content": "What's + my name?"}, ], "answer": "John Doe" } + source: + $ref: '#/components/schemas/DataSource' + description: >- + The data source of the dataset. Ensure that the data source schema is + compatible with the purpose of the dataset. Examples: - { "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": + "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" + } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" + } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The metadata for the dataset. - E.g. {"description": "My dataset"}. + dataset_id: + type: string + description: >- + The ID of the dataset. If not provided, an ID will be generated. + additionalProperties: false + required: + - purpose + - source + title: RegisterDatasetRequest + AgentConfig: + type: object + properties: + sampling_params: + $ref: '#/components/schemas/SamplingParams' + input_shields: + type: array + items: + type: string + output_shields: + type: array + items: + type: string + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + client_tools: + type: array + items: + $ref: '#/components/schemas/ToolDef' + tool_choice: + type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following capabilities + of the model. + deprecated: true + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + title: ToolPromptFormat + description: >- + Prompt format for calling custom / zero shot tools. + deprecated: true + tool_config: + $ref: '#/components/schemas/ToolConfig' + max_infer_iters: + type: integer + default: 10 + model: + type: string + description: >- + The model identifier to use for the agent + instructions: + type: string + description: The system instructions for the agent + name: + type: string + description: >- + Optional name for the agent, used in telemetry and identification + enable_session_persistence: + type: boolean + default: false + description: >- + Optional flag indicating whether session data has to be persisted + response_format: + $ref: '#/components/schemas/ResponseFormat' + description: Optional response format configuration + additionalProperties: false + required: + - model + - instructions + title: AgentConfig + description: Configuration for an agent. + AgentTool: + oneOf: + - type: string + - type: object + properties: + name: + type: string + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - args + title: AgentToolGroupWithArgs + GrammarResponseFormat: + type: object + properties: + type: + type: string + enum: + - json_schema + - grammar + description: >- + Must be "grammar" to identify this format type + const: grammar + default: grammar + bnf: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The BNF grammar specification the response should conform to + additionalProperties: false + required: + - type + - bnf + title: GrammarResponseFormat + description: >- + Configuration for grammar-guided response generation. + GreedySamplingStrategy: + type: object + properties: + type: + type: string + const: greedy + default: greedy + description: >- + Must be "greedy" to identify this sampling strategy + additionalProperties: false + required: + - type + title: GreedySamplingStrategy + description: >- + Greedy sampling strategy that selects the highest probability token at each + step. + JsonSchemaResponseFormat: + type: object + properties: + type: + type: string + enum: + - json_schema + - grammar + description: >- + Must be "json_schema" to identify this format type + const: json_schema + default: json_schema + json_schema: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The JSON schema the response should conform to. In a Python SDK, this + is often a `pydantic` model. + additionalProperties: false + required: + - type + - json_schema + title: JsonSchemaResponseFormat + description: >- + Configuration for JSON schema-guided response generation. + ResponseFormat: + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + - $ref: '#/components/schemas/GrammarResponseFormat' + discriminator: + propertyName: type + mapping: + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + grammar: '#/components/schemas/GrammarResponseFormat' + SamplingParams: + type: object + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + - $ref: '#/components/schemas/TopPSamplingStrategy' + - $ref: '#/components/schemas/TopKSamplingStrategy' + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + description: The sampling strategy. + max_tokens: + type: integer + default: 0 + description: >- + The maximum number of tokens that can be generated in the completion. + The token count of your prompt plus max_tokens cannot exceed the model's + context length. + repetition_penalty: + type: number + default: 1.0 + description: >- + Number between -2.0 and 2.0. Positive values penalize new tokens based + on whether they appear in the text so far, increasing the model's likelihood + to talk about new topics. + stop: + type: array + items: + type: string + description: >- + Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + additionalProperties: false + required: + - strategy + title: SamplingParams + description: Sampling parameters. + ToolConfig: + type: object + properties: + tool_choice: + oneOf: + - type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following + capabilities of the model. + - type: string + default: auto + description: >- + (Optional) Whether tool use is automatic, required, or none. Can also + specify a tool name to use a specific tool. Defaults to ToolChoice.auto. + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + description: >- + (Optional) Instructs the model how to format tool calls. By default, Llama + Stack will attempt to use a format that is best adapted to the model. + - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. + - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python + syntax -- a list of function calls. + system_message_behavior: + type: string + enum: + - append + - replace + description: >- + (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: + Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: + Replaces the default system prompt with the provided system message. The + system message can include the string '{{function_definitions}}' to indicate + where the function definitions should be inserted. + default: append + additionalProperties: false + title: ToolConfig + description: Configuration for tool use. + TopKSamplingStrategy: + type: object + properties: + type: + type: string + const: top_k + default: top_k + description: >- + Must be "top_k" to identify this sampling strategy + top_k: + type: integer + description: >- + Number of top tokens to consider for sampling. Must be at least 1 + additionalProperties: false + required: + - type + - top_k + title: TopKSamplingStrategy + description: >- + Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + type: object + properties: + type: + type: string + const: top_p + default: top_p + description: >- + Must be "top_p" to identify this sampling strategy + temperature: + type: number + description: >- + Controls randomness in sampling. Higher values increase randomness + top_p: + type: number + default: 0.95 + description: >- + Cumulative probability threshold for nucleus sampling. Defaults to 0.95 + additionalProperties: false + required: + - type + title: TopPSamplingStrategy + description: >- + Top-p (nucleus) sampling strategy that samples from the smallest set of tokens + with cumulative probability >= p. + CreateAgentRequest: + type: object + properties: + agent_config: + $ref: '#/components/schemas/AgentConfig' + description: The configuration for the agent. + additionalProperties: false + required: + - agent_config + title: CreateAgentRequest + AgentCreateResponse: + type: object + properties: + agent_id: + type: string + description: Unique identifier for the created agent + additionalProperties: false + required: + - agent_id + title: AgentCreateResponse + description: >- + Response returned when creating a new agent. + Agent: + type: object + properties: + agent_id: + type: string + description: Unique identifier for the agent + agent_config: + $ref: '#/components/schemas/AgentConfig' + description: Configuration settings for the agent + created_at: + type: string + format: date-time + description: Timestamp when the agent was created + additionalProperties: false + required: + - agent_id + - agent_config + - created_at + title: Agent + description: >- + An agent instance with configuration and metadata. + CreateAgentSessionRequest: + type: object + properties: + session_name: + type: string + description: The name of the session to create. + additionalProperties: false + required: + - session_name + title: CreateAgentSessionRequest + AgentSessionCreateResponse: + type: object + properties: + session_id: + type: string + description: >- + Unique identifier for the created session + additionalProperties: false + required: + - session_id + title: AgentSessionCreateResponse + description: >- + Response returned when creating a new agent session. + InferenceStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: inference + default: inference + model_response: + $ref: '#/components/schemas/CompletionMessage' + description: The response from the LLM. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - model_response + title: InferenceStep + description: An inference step in an agent turn. + MemoryRetrievalStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: memory_retrieval + default: memory_retrieval + vector_db_ids: + type: string + description: >- + The IDs of the vector databases to retrieve context from. + inserted_context: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The context retrieved from the vector databases. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - vector_db_ids + - inserted_context + title: MemoryRetrievalStep + description: >- + A memory retrieval step in an agent turn. + Session: + type: object + properties: + session_id: + type: string + description: >- + Unique identifier for the conversation session + session_name: + type: string + description: Human-readable name for the session + turns: + type: array + items: + $ref: '#/components/schemas/Turn' + description: >- + List of all turns that have occurred in this session + started_at: + type: string + format: date-time + description: Timestamp when the session was created + additionalProperties: false + required: + - session_id + - session_name + - turns + - started_at + title: Session + description: >- + A single session of an interaction with an Agentic System. + ShieldCallStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: shield_call + default: shield_call + violation: + $ref: '#/components/schemas/SafetyViolation' + description: The violation from the shield call. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + title: ShieldCallStep + description: A shield call step in an agent turn. + ToolExecutionStep: + type: object + properties: + turn_id: + type: string + description: The ID of the turn. + step_id: + type: string + description: The ID of the step. + started_at: + type: string + format: date-time + description: The time the step started. + completed_at: + type: string + format: date-time + description: The time the step completed. + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + title: StepType + description: Type of the step in an agent turn. + const: tool_execution + default: tool_execution + tool_calls: + type: array + items: + $ref: '#/components/schemas/ToolCall' + description: The tool calls to execute. + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: The tool responses from the tool calls. + additionalProperties: false + required: + - turn_id + - step_id + - step_type + - tool_calls + - tool_responses + title: ToolExecutionStep + description: A tool execution step in an agent turn. + ToolResponse: + type: object + properties: + call_id: + type: string + description: >- + Unique identifier for the tool call this response is for + tool_name: + oneOf: + - type: string + enum: + - brave_search + - wolfram_alpha + - photogen + - code_interpreter + title: BuiltinTool + - type: string + description: Name of the tool that was invoked + content: + $ref: '#/components/schemas/InterleavedContent' + description: The response content from the tool + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool response + additionalProperties: false + required: + - call_id + - tool_name + - content + title: ToolResponse + description: Response from a tool invocation. + Turn: + type: object + properties: + turn_id: + type: string + description: >- + Unique identifier for the turn within a session + session_id: + type: string + description: >- + Unique identifier for the conversation session + input_messages: + type: array + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + description: >- + List of messages that initiated this turn + steps: + type: array + items: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: >- + Ordered list of processing steps executed during this turn + output_message: + $ref: '#/components/schemas/CompletionMessage' + description: >- + The model's generated response containing content and metadata + output_attachments: + type: array + items: + type: object + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the attachment. + mime_type: + type: string + description: The MIME type of the attachment. + additionalProperties: false + required: + - content + - mime_type + title: Attachment + description: An attachment to an agent turn. + description: >- + (Optional) Files or media attached to the agent's response + started_at: + type: string + format: date-time + description: Timestamp when the turn began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the turn finished, if completed + additionalProperties: false + required: + - turn_id + - session_id + - input_messages + - steps + - output_message + - started_at + title: Turn + description: >- + A single turn in an interaction with an Agentic System. + CreateAgentTurnRequest: + type: object + properties: + messages: + type: array + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + description: List of messages to start the turn with. + stream: + type: boolean + description: >- + (Optional) If True, generate an SSE event stream of the response. Defaults + to False. + documents: + type: array + items: + type: object + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the document. + mime_type: + type: string + description: The MIME type of the document. + additionalProperties: false + required: + - content + - mime_type + title: Document + description: A document to be used by an agent. + description: >- + (Optional) List of documents to create the turn with. + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + description: >- + (Optional) List of toolgroups to create the turn with, will be used in + addition to the agent's config toolgroups for the request. + tool_config: + $ref: '#/components/schemas/ToolConfig' + description: >- + (Optional) The tool configuration to create the turn with, will be used + to override the agent's tool_config. + additionalProperties: false + required: + - messages + title: CreateAgentTurnRequest + AgentTurnResponseEvent: + type: object + properties: + payload: + oneOf: + - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' + discriminator: + propertyName: event_type + mapping: + step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' + step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' + step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' + turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' + turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' + description: >- + Event-specific payload containing event data + additionalProperties: false + required: + - payload + title: AgentTurnResponseEvent + description: >- + An event in an agent turn response stream. + AgentTurnResponseStepCompletePayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_complete + default: step_complete + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + step_details: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: Complete details of the executed step + additionalProperties: false + required: + - event_type + - step_type + - step_id + - step_details + title: AgentTurnResponseStepCompletePayload + description: >- + Payload for step completion events in agent turn responses. + AgentTurnResponseStepProgressPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_progress + default: step_progress + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + delta: + oneOf: + - $ref: '#/components/schemas/TextDelta' + - $ref: '#/components/schemas/ImageDelta' + - $ref: '#/components/schemas/ToolCallDelta' + discriminator: + propertyName: type + mapping: + text: '#/components/schemas/TextDelta' + image: '#/components/schemas/ImageDelta' + tool_call: '#/components/schemas/ToolCallDelta' + description: >- + Incremental content changes during step execution + additionalProperties: false + required: + - event_type + - step_type + - step_id + - delta + title: AgentTurnResponseStepProgressPayload + description: >- + Payload for step progress events in agent turn responses. + AgentTurnResponseStepStartPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: step_start + default: step_start + description: Type of event being reported + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + description: Type of step being executed + step_id: + type: string + description: >- + Unique identifier for the step within a turn + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata for the step + additionalProperties: false + required: + - event_type + - step_type + - step_id + title: AgentTurnResponseStepStartPayload + description: >- + Payload for step start events in agent turn responses. + AgentTurnResponseStreamChunk: + type: object + properties: + event: + $ref: '#/components/schemas/AgentTurnResponseEvent' + description: >- + Individual event in the agent turn response stream + additionalProperties: false + required: + - event + title: AgentTurnResponseStreamChunk + description: Streamed agent turn completion response. + "AgentTurnResponseTurnAwaitingInputPayload": + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_awaiting_input + default: turn_awaiting_input + description: Type of event being reported + turn: + $ref: '#/components/schemas/Turn' + description: >- + Turn data when waiting for external tool responses + additionalProperties: false + required: + - event_type + - turn + title: >- + AgentTurnResponseTurnAwaitingInputPayload + description: >- + Payload for turn awaiting input events in agent turn responses. + AgentTurnResponseTurnCompletePayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_complete + default: turn_complete + description: Type of event being reported + turn: + $ref: '#/components/schemas/Turn' + description: >- + Complete turn data including all steps and results + additionalProperties: false + required: + - event_type + - turn + title: AgentTurnResponseTurnCompletePayload + description: >- + Payload for turn completion events in agent turn responses. + AgentTurnResponseTurnStartPayload: + type: object + properties: + event_type: + type: string + enum: + - step_start + - step_complete + - step_progress + - turn_start + - turn_complete + - turn_awaiting_input + const: turn_start + default: turn_start + description: Type of event being reported + turn_id: + type: string + description: >- + Unique identifier for the turn within a session + additionalProperties: false + required: + - event_type + - turn_id + title: AgentTurnResponseTurnStartPayload + description: >- + Payload for turn start events in agent turn responses. + ImageDelta: + type: object + properties: + type: + type: string + const: image + default: image + description: >- + Discriminator type of the delta. Always "image" + image: + type: string + contentEncoding: base64 + description: The incremental image data as bytes + additionalProperties: false + required: + - type + - image + title: ImageDelta + description: >- + An image content delta for streaming responses. + TextDelta: + type: object + properties: + type: + type: string + const: text + default: text + description: >- + Discriminator type of the delta. Always "text" + text: + type: string + description: The incremental text content + additionalProperties: false + required: + - type + - text + title: TextDelta + description: >- + A text content delta for streaming responses. + ToolCallDelta: + type: object + properties: + type: + type: string + const: tool_call + default: tool_call + description: >- + Discriminator type of the delta. Always "tool_call" + tool_call: + oneOf: + - type: string + - $ref: '#/components/schemas/ToolCall' + description: >- + Either an in-progress tool call string or the final parsed tool call + parse_status: + type: string + enum: + - started + - in_progress + - failed + - succeeded + description: Current parsing status of the tool call + additionalProperties: false + required: + - type + - tool_call + - parse_status + title: ToolCallDelta + description: >- + A tool call content delta for streaming responses. + ResumeAgentTurnRequest: + type: object + properties: + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: >- + The tool call responses to resume the turn with. + stream: + type: boolean + description: Whether to stream the response. + additionalProperties: false + required: + - tool_responses + title: ResumeAgentTurnRequest + AgentStepResponse: + type: object + properties: + step: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + description: >- + The complete step data and execution details + additionalProperties: false + required: + - step + title: AgentStepResponse + description: >- + Response containing details of a specific agent step. + Benchmark: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_store + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: benchmark + default: benchmark + description: The resource type, always benchmark + dataset_id: + type: string + description: >- + Identifier of the dataset to use for the benchmark evaluation + scoring_functions: + type: array + items: + type: string + description: >- + List of scoring function identifiers to apply during evaluation + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Metadata for this evaluation task + additionalProperties: false + required: + - identifier + - provider_id + - type + - dataset_id + - scoring_functions + - metadata + title: Benchmark + description: >- + A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Benchmark' + additionalProperties: false + required: + - data + title: ListBenchmarksResponse + RegisterBenchmarkRequest: + type: object + properties: + benchmark_id: + type: string + description: The ID of the benchmark to register. + dataset_id: + type: string + description: >- + The ID of the dataset to use for the benchmark. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the benchmark. + provider_benchmark_id: + type: string + description: >- + The ID of the provider benchmark to use for the benchmark. + provider_id: + type: string + description: >- + The ID of the provider to use for the benchmark. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The metadata to use for the benchmark. + additionalProperties: false + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + AgentCandidate: + type: object + properties: + type: + type: string + const: agent + default: agent + config: + $ref: '#/components/schemas/AgentConfig' + description: >- + The configuration for the agent candidate. + additionalProperties: false + required: + - type + - config + title: AgentCandidate + description: An agent candidate for evaluation. + BenchmarkConfig: + type: object + properties: + eval_candidate: + oneOf: + - $ref: '#/components/schemas/ModelCandidate' + - $ref: '#/components/schemas/AgentCandidate' + discriminator: + propertyName: type + mapping: + model: '#/components/schemas/ModelCandidate' + agent: '#/components/schemas/AgentCandidate' + description: The candidate to evaluate. + scoring_params: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + Map between scoring function id and parameters for each scoring function + you want to run + num_examples: + type: integer + description: >- + (Optional) The number of examples to evaluate. If not provided, all examples + in the dataset will be evaluated + additionalProperties: false + required: + - eval_candidate + - scoring_params + title: BenchmarkConfig + description: >- + A benchmark configuration for evaluation. + ModelCandidate: + type: object + properties: + type: + type: string + const: model + default: model + model: + type: string + description: The model ID to evaluate. + sampling_params: + $ref: '#/components/schemas/SamplingParams' + description: The sampling parameters for the model. + system_message: + $ref: '#/components/schemas/SystemMessage' + description: >- + (Optional) The system message providing instructions or context to the + model. + additionalProperties: false + required: + - type + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. + EvaluateRowsRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to evaluate. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the evaluation. + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - input_rows + - scoring_functions + - benchmark_config + title: EvaluateRowsRequest + EvaluateResponse: + type: object + properties: + generations: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The generations from the evaluation. + scores: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: The scores from the evaluation. + additionalProperties: false + required: + - generations + - scores + title: EvaluateResponse + description: The response from an evaluation. + RunEvalRequest: + type: object + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - benchmark_config + title: RunEvalRequest + Job: + type: object + properties: + job_id: + type: string + description: Unique identifier for the job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current execution status of the job + additionalProperties: false + required: + - job_id + - status + title: Job + description: >- + A job execution instance with status tracking. + RerankRequest: + type: object + properties: + model: + type: string + description: >- + The identifier of the reranking model to use. + query: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + The search query to rank items against. Can be a string, text content + part, or image content part. The input must not exceed the model's max + input token length. + items: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + List of items to rerank. Each item can be a string, text content part, + or image content part. Each input must not exceed the model's max input + token length. + max_num_results: + type: integer + description: >- + (Optional) Maximum number of results to return. Default: returns all. + additionalProperties: false + required: + - model + - query + - items + title: RerankRequest + RerankData: + type: object + properties: + index: + type: integer + description: >- + The original index of the document in the input list + relevance_score: + type: number + description: >- + The relevance score from the model output. Values are inverted when applicable + so that higher scores indicate greater relevance. + additionalProperties: false + required: + - index + - relevance_score + title: RerankData + description: >- + A single rerank result from a reranking response. + RerankResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RerankData' + description: >- + List of rerank result objects, sorted by relevance score (descending) + additionalProperties: false + required: + - data + title: RerankResponse + description: Response from a reranking request. + Checkpoint: + type: object + properties: + identifier: + type: string + description: Unique identifier for the checkpoint + created_at: + type: string + format: date-time + description: >- + Timestamp when the checkpoint was created + epoch: + type: integer + description: >- + Training epoch when the checkpoint was saved + post_training_job_id: + type: string + description: >- + Identifier of the training job that created this checkpoint + path: + type: string + description: >- + File system path where the checkpoint is stored + training_metrics: + $ref: '#/components/schemas/PostTrainingMetric' + description: >- + (Optional) Training metrics associated with this checkpoint + additionalProperties: false + required: + - identifier + - created_at + - epoch + - post_training_job_id + - path + title: Checkpoint + description: Checkpoint created during training runs. + PostTrainingJobArtifactsResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - checkpoints + title: PostTrainingJobArtifactsResponse + description: Artifacts of a finetuning job. + PostTrainingMetric: + type: object + properties: + epoch: + type: integer + description: Training epoch number + train_loss: + type: number + description: Loss value on the training dataset + validation_loss: + type: number + description: Loss value on the validation dataset + perplexity: + type: number + description: >- + Perplexity metric indicating model confidence + additionalProperties: false + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: >- + Training metrics captured during post-training jobs. + CancelTrainingJobRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to cancel. + additionalProperties: false + required: + - job_uuid + title: CancelTrainingJobRequest + PostTrainingJobStatusResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current status of the training job + scheduled_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job was scheduled + started_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job execution began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job finished, if completed + resources_allocated: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Information about computational resources allocated to the + job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - status + - checkpoints + title: PostTrainingJobStatusResponse + description: Status of a finetuning job. + ListPostTrainingJobsResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + additionalProperties: false + required: + - data + title: ListPostTrainingJobsResponse + DPOAlignmentConfig: + type: object + properties: + beta: + type: number + description: Temperature parameter for the DPO loss + loss_type: + $ref: '#/components/schemas/DPOLossType' + default: sigmoid + description: The type of loss function to use for DPO + additionalProperties: false + required: + - beta + - loss_type + title: DPOAlignmentConfig + description: >- + Configuration for Direct Preference Optimization (DPO) alignment. + DPOLossType: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + title: DPOLossType + DataConfig: + type: object + properties: + dataset_id: + type: string + description: >- + Unique identifier for the training dataset + batch_size: + type: integer + description: Number of samples per training batch + shuffle: + type: boolean + description: >- + Whether to shuffle the dataset during training + data_format: + $ref: '#/components/schemas/DatasetFormat' + description: >- + Format of the dataset (instruct or dialog) + validation_dataset_id: + type: string + description: >- + (Optional) Unique identifier for the validation dataset + packed: + type: boolean + default: false + description: >- + (Optional) Whether to pack multiple samples into a single sequence for + efficiency + train_on_input: + type: boolean + default: false + description: >- + (Optional) Whether to compute loss on input tokens as well as output tokens + additionalProperties: false + required: + - dataset_id + - batch_size + - shuffle + - data_format + title: DataConfig + description: >- + Configuration for training data and data loading. + DatasetFormat: + type: string + enum: + - instruct + - dialog + title: DatasetFormat + description: Format of the training dataset. + EfficiencyConfig: + type: object + properties: + enable_activation_checkpointing: + type: boolean + default: false + description: >- + (Optional) Whether to use activation checkpointing to reduce memory usage + enable_activation_offloading: + type: boolean + default: false + description: >- + (Optional) Whether to offload activations to CPU to save GPU memory + memory_efficient_fsdp_wrap: + type: boolean + default: false + description: >- + (Optional) Whether to use memory-efficient FSDP wrapping + fsdp_cpu_offload: + type: boolean + default: false + description: >- + (Optional) Whether to offload FSDP parameters to CPU + additionalProperties: false + title: EfficiencyConfig + description: >- + Configuration for memory and compute efficiency optimizations. + OptimizerConfig: + type: object + properties: + optimizer_type: + $ref: '#/components/schemas/OptimizerType' + description: >- + Type of optimizer to use (adam, adamw, or sgd) + lr: + type: number + description: Learning rate for the optimizer + weight_decay: + type: number + description: >- + Weight decay coefficient for regularization + num_warmup_steps: + type: integer + description: Number of steps for learning rate warmup + additionalProperties: false + required: + - optimizer_type + - lr + - weight_decay + - num_warmup_steps + title: OptimizerConfig + description: >- + Configuration parameters for the optimization algorithm. + OptimizerType: + type: string + enum: + - adam + - adamw + - sgd + title: OptimizerType + description: >- + Available optimizer algorithms for training. + TrainingConfig: + type: object + properties: + n_epochs: + type: integer + description: Number of training epochs to run + max_steps_per_epoch: + type: integer + default: 1 + description: Maximum number of steps to run per epoch + gradient_accumulation_steps: + type: integer + default: 1 + description: >- + Number of steps to accumulate gradients before updating + max_validation_steps: + type: integer + default: 1 + description: >- + (Optional) Maximum number of validation steps per epoch + data_config: + $ref: '#/components/schemas/DataConfig' + description: >- + (Optional) Configuration for data loading and formatting + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + description: >- + (Optional) Configuration for the optimization algorithm + efficiency_config: + $ref: '#/components/schemas/EfficiencyConfig' + description: >- + (Optional) Configuration for memory and compute optimizations + dtype: + type: string + default: bf16 + description: >- + (Optional) Data type for model parameters (bf16, fp16, fp32) + additionalProperties: false + required: + - n_epochs + - max_steps_per_epoch + - gradient_accumulation_steps + title: TrainingConfig + description: >- + Comprehensive configuration for the training process. + PreferenceOptimizeRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + finetuned_model: + type: string + description: The model to fine-tune. + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + description: The algorithm configuration. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + additionalProperties: false + required: + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config + title: PreferenceOptimizeRequest + PostTrainingJob: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + AlgorithmConfig: + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QATFinetuningConfig' + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + LoraFinetuningConfig: + type: object + properties: + type: + type: string + const: LoRA + default: LoRA + description: Algorithm type identifier, always "LoRA" + lora_attn_modules: + type: array + items: + type: string + description: >- + List of attention module names to apply LoRA to + apply_lora_to_mlp: + type: boolean + description: Whether to apply LoRA to MLP layers + apply_lora_to_output: + type: boolean + description: >- + Whether to apply LoRA to output projection layers + rank: + type: integer + description: >- + Rank of the LoRA adaptation (lower rank = fewer parameters) + alpha: + type: integer + description: >- + LoRA scaling parameter that controls adaptation strength + use_dora: + type: boolean + default: false + description: >- + (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + quantize_base: + type: boolean + default: false + description: >- + (Optional) Whether to quantize the base model weights + additionalProperties: false + required: + - type + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: >- + Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: + type: object + properties: + type: + type: string + const: QAT + default: QAT + description: Algorithm type identifier, always "QAT" + quantizer_name: + type: string + description: >- + Name of the quantization algorithm to use + group_size: + type: integer + description: Size of groups for grouped quantization + additionalProperties: false + required: + - type + - quantizer_name + - group_size + title: QATFinetuningConfig + description: >- + Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + model: + type: string + description: The model to fine-tune. + checkpoint_dir: + type: string + description: The directory to save checkpoint(s) to. + algorithm_config: + $ref: '#/components/schemas/AlgorithmConfig' + description: The algorithm configuration. + additionalProperties: false + required: + - job_uuid + - training_config + - hyperparam_search_config + - logger_config + title: SupervisedFineTuneRequest + responses: + BadRequest400: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 400 + title: Bad Request + detail: The request was invalid or malformed + TooManyRequests429: + description: >- + The client has sent too many requests in a given amount of time + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 429 + title: Too Many Requests + detail: >- + You have exceeded the rate limit. Please try again later. + InternalServerError500: + description: >- + The server encountered an unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 500 + title: Internal Server Error + detail: >- + An unexpected error occurred. Our team has been notified. + DefaultError: + description: An unexpected error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 0 + title: Error + detail: An unexpected error occurred +security: + - Default: [] +tags: + - name: Agents + description: >- + APIs for creating and interacting with agentic systems. + x-displayName: Agents + - name: Benchmarks + description: '' + - name: Conversations + description: >- + Protocol for conversation management operations. + x-displayName: Conversations + - name: DatasetIO + description: '' + - name: Datasets + description: '' + - name: Eval + description: >- + Llama Stack Evaluation API for running evaluations on model and agent candidates. + x-displayName: Evaluations + - name: Files + description: >- + This API is used to upload documents that can be used with other Llama Stack + APIs. + x-displayName: Files + - name: Inference + description: >- + Llama Stack Inference API for generating completions, chat completions, and + embeddings. + + + This API provides the raw interface to the underlying models. Two kinds of models + are supported: + + - LLM models: these models generate "raw" and "chat" (conversational) completions. + + - Embedding models: these models generate embeddings to be used for semantic + search. + x-displayName: Inference + - name: Inspect + description: >- + APIs for inspecting the Llama Stack service, including health status, available + API routes with methods and implementing providers. + x-displayName: Inspect + - name: Models + description: '' + - name: PostTraining (Coming Soon) + description: '' + - name: Prompts + description: >- + Protocol for prompt management operations. + x-displayName: Prompts + - name: Providers + description: >- + Providers API for inspecting, listing, and modifying providers and their configurations. + x-displayName: Providers + - name: Safety + description: OpenAI-compatible Moderations API. + x-displayName: Safety + - name: Scoring + description: '' + - name: ScoringFunctions + description: '' + - name: Shields + description: '' + - name: SyntheticDataGeneration (Coming Soon) + description: '' + - name: ToolGroups + description: '' + - name: ToolRuntime + description: '' + - name: VectorIO + description: '' +x-tagGroups: + - name: Operations + tags: + - Agents + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - SyntheticDataGeneration (Coming Soon) + - ToolGroups + - ToolRuntime + - VectorIO diff --git a/containers/Containerfile b/containers/Containerfile index c6e47fa1d..1ddf102af 100644 --- a/containers/Containerfile +++ b/containers/Containerfile @@ -60,6 +60,17 @@ ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH} # Copy the repository so editable installs and run configurations are available. COPY . /workspace +# Install the client package if it is provided +# NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python +RUN set -eux; \ + if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ + if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ + echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ + exit 1; \ + fi; \ + uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ + fi; + # Install llama-stack RUN set -eux; \ if [ "$INSTALL_MODE" = "editable" ]; then \ @@ -83,16 +94,6 @@ RUN set -eux; \ fi; \ fi; -# Install the client package if it is provided -RUN set -eux; \ - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ - if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ - echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ - exit 1; \ - fi; \ - uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ - fi; - # Install the dependencies for the distribution RUN set -eux; \ if [ -z "$DISTRO_NAME" ]; then \ diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index 8307448be..b1681dc62 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through: To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: ```yaml -models: - - model_id: nomic-ai/nomic-embed-text-v1.5 - provider_id: inline::sentence-transformers - metadata: - embedding_dimension: 768 - default_configured: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 ``` With this configuration: -- `client.vector_stores.create()` works without requiring embedding model parameters -- The system automatically uses the default model and its embedding dimension for any newly created vector store -- Only one model can be marked as `default_configured: true` +- `client.vector_stores.create()` works without requiring embedding model or provider parameters +- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available +- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store +- The `default_provider_id` specifies which vector storage backend to use +- The `default_embedding_model` specifies both the inference provider and model for embeddings ## Vector Store Operations @@ -108,14 +109,15 @@ With this configuration: You can create vector stores with automatic or explicit embedding model selection: ```python -# Automatic - uses default configured embedding model +# Automatic - uses default configured embedding model and vector store provider vs = client.vector_stores.create() -# Explicit - specify embedding model when you need a specific one +# Explicit - specify embedding model and/or provider when you need specific ones vs = client.vector_stores.create( extra_body={ - "embedding_model": "nomic-ai/nomic-embed-text-v1.5", - "embedding_dimension": 768 + "provider_id": "faiss", # Optional: specify vector store provider + "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5", + "embedding_dimension": 768 # Optional: will be auto-detected if not provided } ) ``` diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index 81243c97b..bf3156865 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -44,18 +44,32 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + persistence: + agent_state: + backend: kv_default + namespace: agents + responses: + backend: sql_default + table_name: responses telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index 3dbb0da97..c71ab05d8 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -1,56 +1,155 @@ apiVersion: v1 data: - stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- - inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n - \ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n - \ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: - ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: - ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: - remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n - \ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n - \ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n - \ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n - \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n - \ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n - \ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n - \ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n - \ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: - meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: - ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n - \ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db - \ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n - \ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n - \ provider_type: inline::meta-reference\n config:\n persistence_store:\n - \ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: - ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n - \ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: - ${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n - \ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n - \ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n - \ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: - 3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: - {}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n - \ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: - ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- - metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: - sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n - \ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: - ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n - \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: - []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: - builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n - \ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n - \ type: github_token\n" + stack_run_config.yaml: | + version: '2' + image_name: kubernetes-demo + apis: + - agents + - inference + - files + - safety + - telemetry + - tool_runtime + - vector_io + providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: vllm-safety + provider_type: remote::vllm + config: + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + kvstore: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime + server: + port: 8321 + auth: + provider_config: + type: github_token kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index ee28a1ea8..863565fdf 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -93,21 +93,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index b7134b3e1..666850976 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -21,7 +21,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo | inference | `inline::meta-reference` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::model-context-protocol` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 4a7d99ff5..b1de9ddb8 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -16,7 +16,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | post_training | `remote::nvidia` | | safety | `remote::nvidia` | | scoring | `inline::basic` | -| tool_runtime | `inline::rag-runtime` | +| tool_runtime | | | vector_io | `inline::faiss` | diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx index fd961745f..fac9b8406 100644 --- a/docs/docs/providers/agents/inline_meta-reference.mdx +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | -| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db -responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db +persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 ``` diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx index f43800555..45304fbb1 100644 --- a/docs/docs/providers/batches/inline_reference.mdx +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. | +| `kvstore` | `` | No | | Configuration for the key-value store backend. | | `max_concurrent_batches` | `` | No | 1 | Maximum number of concurrent batches to process simultaneously. | | `max_concurrent_requests_per_batch` | `` | No | 10 | Maximum number of concurrent requests to process per batch. | @@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence. ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db + namespace: batches + backend: kv_default ``` diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx index b02a3a3bd..a9363376c 100644 --- a/docs/docs/providers/datasetio/inline_localfs.mdx +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default ``` diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx index 82597d999..de3ffaaa6 100644 --- a/docs/docs/providers/datasetio/remote_huggingface.mdx +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default ``` diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx index b0eb589e0..2c86c18c9 100644 --- a/docs/docs/providers/eval/inline_meta-reference.mdx +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `kvstore` | `` | No | | | ## Sample Configuration ```yaml kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db + namespace: eval + backend: kv_default ``` diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx index 86d141f93..bff0c4eb9 100644 --- a/docs/docs/providers/files/inline_localfs.mdx +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `storage_dir` | `` | No | | Directory to store uploaded files | -| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | +| `metadata_store` | `` | No | | SQL store configuration for file metadata | | `ttl_secs` | `` | No | 31536000 | | ## Sample Configuration @@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo ```yaml storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db + table_name: files_metadata + backend: sql_default ``` diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx index 353cedbfb..65cd545c5 100644 --- a/docs/docs/providers/files/remote_s3.mdx +++ b/docs/docs/providers/files/remote_s3.mdx @@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | | `auto_create_bucket` | `` | No | False | Automatically create the S3 bucket if it doesn't exist | -| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | +| `metadata_store` | `` | No | | SQL store configuration for file metadata | ## Sample Configuration @@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=} endpoint_url: ${env.S3_ENDPOINT_URL:=} auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db + table_name: s3_files_metadata + backend: sql_default ``` diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx index a1858eacc..0be5cd5b3 100644 --- a/docs/docs/providers/vector_io/inline_chromadb.mdx +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | ## Sample Configuration ```yaml db_path: ${env.CHROMADB_PATH} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db +persistence: + namespace: vector_io::chroma + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx index 03bc2a928..3a1fba055 100644 --- a/docs/docs/providers/vector_io/inline_faiss.mdx +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -95,12 +95,12 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db +persistence: + namespace: vector_io::faiss + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx index bcad86750..17fd40cf5 100644 --- a/docs/docs/providers/vector_io/inline_meta-reference.mdx +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -14,14 +14,14 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db +persistence: + namespace: vector_io::faiss + backend: kv_default ``` ## Deprecation Notice diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx index 7e6f15c81..6063edab1 100644 --- a/docs/docs/providers/vector_io/inline_milvus.mdx +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -17,14 +17,14 @@ Please refer to the remote provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | ## Sample Configuration ```yaml db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db +persistence: + namespace: vector_io::milvus + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx index 5c9ab10f2..057d96761 100644 --- a/docs/docs/providers/vector_io/inline_qdrant.mdx +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `path` | `` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db +persistence: + namespace: vector_io::qdrant + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index aa6992a56..459498a59 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -28,7 +28,7 @@ description: | #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted - in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. + in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times @@ -233,7 +233,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. +in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times @@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | Path to the SQLite database file | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db +persistence: + namespace: vector_io::sqlite_vec + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 7f69f617d..67cbd0021 100644 --- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | | Path to the SQLite database file | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db +persistence: + namespace: vector_io::sqlite_vec + backend: kv_default ``` ## Deprecation Notice diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx index 807771003..2aee3eeca 100644 --- a/docs/docs/providers/vector_io/remote_chromadb.mdx +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | ## Sample Configuration ```yaml url: ${env.CHROMADB_URL} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db +persistence: + namespace: vector_io::chroma_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx index 7f7c08122..bf9935d61 100644 --- a/docs/docs/providers/vector_io/remote_milvus.mdx +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `uri` | `` | No | | The URI of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `persistence` | `` | No | | Config for KV store backend | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | :::note @@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo ```yaml uri: ${env.MILVUS_ENDPOINT} token: ${env.MILVUS_TOKEN} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db +persistence: + namespace: vector_io::milvus_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx index d21810c68..cb70f35d1 100644 --- a/docs/docs/providers/vector_io/remote_pgvector.mdx +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `db` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration @@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432} db: ${env.PGVECTOR_DB} user: ${env.PGVECTOR_USER} password: ${env.PGVECTOR_PASSWORD} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db +persistence: + namespace: vector_io::pgvector + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx index c44a2b937..dff9642b5 100644 --- a/docs/docs/providers/vector_io/remote_qdrant.mdx +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -26,13 +26,13 @@ Please refer to the inline provider documentation. | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `persistence` | `` | No | | | ## Sample Configuration ```yaml api_key: ${env.QDRANT_API_KEY:=} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db +persistence: + namespace: vector_io::qdrant_remote + backend: kv_default ``` diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx index 3f1e36422..b809bed2e 100644 --- a/docs/docs/providers/vector_io/remote_weaviate.mdx +++ b/docs/docs/providers/vector_io/remote_weaviate.mdx @@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more |-------|------|----------|---------|-------------| | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | -| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration ```yaml weaviate_api_key: null weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} -kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db +persistence: + namespace: vector_io::weaviate + backend: kv_default ``` diff --git a/docs/docs/references/llama_stack_client_cli_reference.md b/docs/docs/references/llama_stack_client_cli_reference.md index 9bb514a2d..a4321938a 100644 --- a/docs/docs/references/llama_stack_client_cli_reference.md +++ b/docs/docs/references/llama_stack_client_cli_reference.md @@ -32,7 +32,6 @@ Commands: scoring_functions Manage scoring functions. shields Manage safety shield services. toolgroups Manage available tool groups. - vector_dbs Manage vector databases. ``` ### `llama-stack-client configure` @@ -211,53 +210,6 @@ Unregister a model from distribution endpoint llama-stack-client models unregister ``` -## Vector DB Management -Manage vector databases. - - -### `llama-stack-client vector_dbs list` -Show available vector dbs on distribution endpoint -```bash -llama-stack-client vector_dbs list -``` -``` -┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │ -│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │ -│ │ │ │ │ type: vector_db │ -│ │ │ │ │ │ -└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘ -``` - -### `llama-stack-client vector_dbs register` -Create a new vector db -```bash -llama-stack-client vector_dbs register [--provider-id ] [--provider-vector-db-id ] [--embedding-model ] [--embedding-dimension ] -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - -Optional arguments: -- `--provider-id`: Provider ID for the vector db -- `--provider-vector-db-id`: Provider's vector db ID -- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5` -- `--embedding-dimension`: Dimension of embeddings. Default: 768 - -### `llama-stack-client vector_dbs unregister` -Delete a vector db -```bash -llama-stack-client vector_dbs unregister -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - - ## Shield Management Manage safety shield services. ### `llama-stack-client shields list` diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index 2970d7e53..e5f33f13d 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -196,16 +196,10 @@ def _get_endpoint_functions( def _get_defining_class(member_fn: str, derived_cls: type) -> type: "Find the class in which a member function is first defined in a class inheritance hierarchy." - # This import must be dynamic here - from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime - # iterate in reverse member resolution order to find most specific class first for cls in reversed(inspect.getmro(derived_cls)): for name, _ in inspect.getmembers(cls, inspect.isfunction): if name == member_fn: - # HACK ALERT - if cls == RAGToolRuntime: - return ToolRuntime return cls raise ValidationError( diff --git a/docs/openapi_generator/run_openapi_generator.sh b/docs/openapi_generator/run_openapi_generator.sh index 45d00d6e7..6cffd42b0 100755 --- a/docs/openapi_generator/run_openapi_generator.sh +++ b/docs/openapi_generator/run_openapi_generator.sh @@ -30,3 +30,5 @@ fi stack_dir=$(dirname $(dirname $THIS_DIR)) PYTHONPATH=$PYTHONPATH:$stack_dir \ python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static + +cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 60a8b9fbd..d920317cf 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -5547,7 +5547,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -5798,7 +5798,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -9024,6 +9024,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9901,6 +9905,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index aaa6cd413..66b2caeca 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -4114,7 +4114,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -4303,7 +4303,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -6734,6 +6734,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7403,6 +7407,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html index 7d572f89f..ab474180e 100644 --- a/docs/static/experimental-llama-stack-spec.html +++ b/docs/static/experimental-llama-stack-spec.html @@ -1850,7 +1850,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -3983,7 +3983,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index fee20814c..dd9e43cc5 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -1320,7 +1320,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -2927,7 +2927,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 413e4f23e..7dfb2ed13 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -2624,89 +2624,6 @@ "deprecated": false } }, - "/v1/tool-runtime/rag-tool/insert": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Index documents so they can be used by the RAG system.", - "description": "Index documents so they can be used by the RAG system.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InsertRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1/tool-runtime/rag-tool/query": { - "post": { - "responses": { - "200": { - "description": "RAGQueryResult containing the retrieved content and metadata", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RAGQueryResult" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Query the RAG system for context; typically invoked by the agent.", - "description": "Query the RAG system for context; typically invoked by the agent.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/toolgroups": { "get": { "responses": { @@ -6800,7 +6717,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -7600,6 +7517,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -8148,6 +8069,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, @@ -10197,7 +10122,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -10679,7 +10604,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -11375,346 +11300,6 @@ "title": "ListToolDefsResponse", "description": "Response containing a list of tool definitions." }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { - "type": "string", - "description": "The unique identifier for the document." - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ], - "description": "The content of the document." - }, - "mime_type": { - "type": "string", - "description": "The MIME type of the document." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the document." - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ], - "title": "RAGDocument", - "description": "A document to be used for document ingestion in the RAG Tool." - }, - "InsertRequest": { - "type": "object", - "properties": { - "documents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RAGDocument" - }, - "description": "List of documents to index in the RAG system" - }, - "vector_db_id": { - "type": "string", - "description": "ID of the vector database to store the document embeddings" - }, - "chunk_size_in_tokens": { - "type": "integer", - "description": "(Optional) Size in tokens for document chunking during indexing" - } - }, - "additionalProperties": false, - "required": [ - "documents", - "vector_db_id", - "chunk_size_in_tokens" - ], - "title": "InsertRequest" - }, - "DefaultRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default", - "default": "default", - "description": "Type of query generator, always 'default'" - }, - "separator": { - "type": "string", - "default": " ", - "description": "String separator used to join query terms" - } - }, - "additionalProperties": false, - "required": [ - "type", - "separator" - ], - "title": "DefaultRAGQueryGeneratorConfig", - "description": "Configuration for the default RAG query generator." - }, - "LLMRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm", - "default": "llm", - "description": "Type of query generator, always 'llm'" - }, - "model": { - "type": "string", - "description": "Name of the language model to use for query generation" - }, - "template": { - "type": "string", - "description": "Template string for formatting the query generation prompt" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ], - "title": "LLMRAGQueryGeneratorConfig", - "description": "Configuration for the LLM-based RAG query generator." - }, - "RAGQueryConfig": { - "type": "object", - "properties": { - "query_generator_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" - }, - { - "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", - "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - }, - "description": "Configuration for the query generator." - }, - "max_tokens_in_context": { - "type": "integer", - "default": 4096, - "description": "Maximum number of tokens in the context." - }, - "max_chunks": { - "type": "integer", - "default": 5, - "description": "Maximum number of chunks to retrieve." - }, - "chunk_template": { - "type": "string", - "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" - }, - "mode": { - "$ref": "#/components/schemas/RAGSearchMode", - "default": "vector", - "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." - }, - "ranker": { - "$ref": "#/components/schemas/Ranker", - "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." - } - }, - "additionalProperties": false, - "required": [ - "query_generator_config", - "max_tokens_in_context", - "max_chunks", - "chunk_template" - ], - "title": "RAGQueryConfig", - "description": "Configuration for the RAG query generation." - }, - "RAGSearchMode": { - "type": "string", - "enum": [ - "vector", - "keyword", - "hybrid" - ], - "title": "RAGSearchMode", - "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" - }, - "RRFRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rrf", - "default": "rrf", - "description": "The type of ranker, always \"rrf\"" - }, - "impact_factor": { - "type": "number", - "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" - } - }, - "additionalProperties": false, - "required": [ - "type", - "impact_factor" - ], - "title": "RRFRanker", - "description": "Reciprocal Rank Fusion (RRF) ranker configuration." - }, - "Ranker": { - "oneOf": [ - { - "$ref": "#/components/schemas/RRFRanker" - }, - { - "$ref": "#/components/schemas/WeightedRanker" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "rrf": "#/components/schemas/RRFRanker", - "weighted": "#/components/schemas/WeightedRanker" - } - } - }, - "WeightedRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "weighted", - "default": "weighted", - "description": "The type of ranker, always \"weighted\"" - }, - "alpha": { - "type": "number", - "default": 0.5, - "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." - } - }, - "additionalProperties": false, - "required": [ - "type", - "alpha" - ], - "title": "WeightedRanker", - "description": "Weighted ranker configuration that combines vector and keyword scores." - }, - "QueryRequest": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query content to search for in the indexed documents" - }, - "vector_db_ids": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of vector database IDs to search within" - }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig", - "description": "(Optional) Configuration parameters for the query operation" - } - }, - "additionalProperties": false, - "required": [ - "content", - "vector_db_ids" - ], - "title": "QueryRequest" - }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The retrieved content from the query" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata about the query result" - } - }, - "additionalProperties": false, - "required": [ - "metadata" - ], - "title": "RAGQueryResult", - "description": "Result of a RAG query containing retrieved content and metadata." - }, "ToolGroup": { "type": "object", "properties": { @@ -11732,7 +11317,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 93e51de6a..1b0fefe55 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2036,69 +2036,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -5227,7 +5164,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -5815,6 +5752,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -6218,6 +6159,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at @@ -7911,7 +7856,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8219,7 +8164,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8700,274 +8645,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: @@ -8982,7 +8659,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 858f20725..7930b28e6 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -2624,89 +2624,6 @@ "deprecated": false } }, - "/v1/tool-runtime/rag-tool/insert": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Index documents so they can be used by the RAG system.", - "description": "Index documents so they can be used by the RAG system.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InsertRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1/tool-runtime/rag-tool/query": { - "post": { - "responses": { - "200": { - "description": "RAGQueryResult containing the retrieved content and metadata", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RAGQueryResult" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Query the RAG system for context; typically invoked by the agent.", - "description": "Query the RAG system for context; typically invoked by the agent.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/toolgroups": { "get": { "responses": { @@ -8472,7 +8389,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -9272,6 +9189,10 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9820,6 +9741,10 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "type": "string", + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, @@ -11869,7 +11794,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -12351,7 +12276,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -13047,346 +12972,6 @@ "title": "ListToolDefsResponse", "description": "Response containing a list of tool definitions." }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { - "type": "string", - "description": "The unique identifier for the document." - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ], - "description": "The content of the document." - }, - "mime_type": { - "type": "string", - "description": "The MIME type of the document." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the document." - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ], - "title": "RAGDocument", - "description": "A document to be used for document ingestion in the RAG Tool." - }, - "InsertRequest": { - "type": "object", - "properties": { - "documents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RAGDocument" - }, - "description": "List of documents to index in the RAG system" - }, - "vector_db_id": { - "type": "string", - "description": "ID of the vector database to store the document embeddings" - }, - "chunk_size_in_tokens": { - "type": "integer", - "description": "(Optional) Size in tokens for document chunking during indexing" - } - }, - "additionalProperties": false, - "required": [ - "documents", - "vector_db_id", - "chunk_size_in_tokens" - ], - "title": "InsertRequest" - }, - "DefaultRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default", - "default": "default", - "description": "Type of query generator, always 'default'" - }, - "separator": { - "type": "string", - "default": " ", - "description": "String separator used to join query terms" - } - }, - "additionalProperties": false, - "required": [ - "type", - "separator" - ], - "title": "DefaultRAGQueryGeneratorConfig", - "description": "Configuration for the default RAG query generator." - }, - "LLMRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm", - "default": "llm", - "description": "Type of query generator, always 'llm'" - }, - "model": { - "type": "string", - "description": "Name of the language model to use for query generation" - }, - "template": { - "type": "string", - "description": "Template string for formatting the query generation prompt" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ], - "title": "LLMRAGQueryGeneratorConfig", - "description": "Configuration for the LLM-based RAG query generator." - }, - "RAGQueryConfig": { - "type": "object", - "properties": { - "query_generator_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" - }, - { - "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", - "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - }, - "description": "Configuration for the query generator." - }, - "max_tokens_in_context": { - "type": "integer", - "default": 4096, - "description": "Maximum number of tokens in the context." - }, - "max_chunks": { - "type": "integer", - "default": 5, - "description": "Maximum number of chunks to retrieve." - }, - "chunk_template": { - "type": "string", - "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" - }, - "mode": { - "$ref": "#/components/schemas/RAGSearchMode", - "default": "vector", - "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." - }, - "ranker": { - "$ref": "#/components/schemas/Ranker", - "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." - } - }, - "additionalProperties": false, - "required": [ - "query_generator_config", - "max_tokens_in_context", - "max_chunks", - "chunk_template" - ], - "title": "RAGQueryConfig", - "description": "Configuration for the RAG query generation." - }, - "RAGSearchMode": { - "type": "string", - "enum": [ - "vector", - "keyword", - "hybrid" - ], - "title": "RAGSearchMode", - "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" - }, - "RRFRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rrf", - "default": "rrf", - "description": "The type of ranker, always \"rrf\"" - }, - "impact_factor": { - "type": "number", - "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" - } - }, - "additionalProperties": false, - "required": [ - "type", - "impact_factor" - ], - "title": "RRFRanker", - "description": "Reciprocal Rank Fusion (RRF) ranker configuration." - }, - "Ranker": { - "oneOf": [ - { - "$ref": "#/components/schemas/RRFRanker" - }, - { - "$ref": "#/components/schemas/WeightedRanker" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "rrf": "#/components/schemas/RRFRanker", - "weighted": "#/components/schemas/WeightedRanker" - } - } - }, - "WeightedRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "weighted", - "default": "weighted", - "description": "The type of ranker, always \"weighted\"" - }, - "alpha": { - "type": "number", - "default": 0.5, - "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." - } - }, - "additionalProperties": false, - "required": [ - "type", - "alpha" - ], - "title": "WeightedRanker", - "description": "Weighted ranker configuration that combines vector and keyword scores." - }, - "QueryRequest": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query content to search for in the indexed documents" - }, - "vector_db_ids": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of vector database IDs to search within" - }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig", - "description": "(Optional) Configuration parameters for the query operation" - } - }, - "additionalProperties": false, - "required": [ - "content", - "vector_db_ids" - ], - "title": "QueryRequest" - }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The retrieved content from the query" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata about the query result" - } - }, - "additionalProperties": false, - "required": [ - "metadata" - ], - "title": "RAGQueryResult", - "description": "Result of a RAG query containing retrieved content and metadata." - }, "ToolGroup": { "type": "object", "properties": { @@ -13404,7 +12989,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -14951,7 +14536,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -16696,7 +16281,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 886549dbc..98a309f12 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2039,69 +2039,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -6440,7 +6377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -7028,6 +6965,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7431,6 +7372,10 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + type: string + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at @@ -9124,7 +9069,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9432,7 +9377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9913,274 +9858,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: @@ -10195,7 +9872,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -11317,7 +10994,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -12644,7 +12321,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 25dc89a6b..821d6a8af 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel): :param tools: (Optional) An array of tools the model may call while generating a response. :param truncation: (Optional) Truncation strategy applied to the response :param usage: (Optional) Token usage information for the response + :param instructions: (Optional) System message inserted into the model's context """ created_at: int @@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel): tools: list[OpenAIResponseTool] | None = None truncation: str | None = None usage: OpenAIResponseUsage | None = None + instructions: str | None = None @json_schema_type diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 8fbf21f3e..948ec615f 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta): models = "models" shields = "shields" + vector_stores = "vector_stores" # only used for routing table datasets = "datasets" scoring_functions = "scoring_functions" benchmarks = "benchmarks" diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py index 7c4130f7d..dafdb28b0 100644 --- a/llama_stack/apis/resource.py +++ b/llama_stack/apis/resource.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, Field class ResourceType(StrEnum): model = "model" shield = "shield" - vector_db = "vector_db" + vector_store = "vector_store" dataset = "dataset" scoring_function = "scoring_function" benchmark = "benchmark" @@ -34,4 +34,4 @@ class Resource(BaseModel): provider_id: str = Field(description="ID of the provider that owns this resource") - type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)") + type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)") diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py index b25310ecf..2908d1c62 100644 --- a/llama_stack/apis/tools/__init__.py +++ b/llama_stack/apis/tools/__init__.py @@ -4,5 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .rag_tool import * from .tools import * diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py deleted file mode 100644 index ed7847e23..000000000 --- a/llama_stack/apis/tools/rag_tool.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field, field_validator -from typing_extensions import runtime_checkable - -from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -@json_schema_type -class RRFRanker(BaseModel): - """ - Reciprocal Rank Fusion (RRF) ranker configuration. - - :param type: The type of ranker, always "rrf" - :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. - Must be greater than 0 - """ - - type: Literal["rrf"] = "rrf" - impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance - - -@json_schema_type -class WeightedRanker(BaseModel): - """ - Weighted ranker configuration that combines vector and keyword scores. - - :param type: The type of ranker, always "weighted" - :param alpha: Weight factor between 0 and 1. - 0 means only use keyword scores, - 1 means only use vector scores, - values in between blend both scores. - """ - - type: Literal["weighted"] = "weighted" - alpha: float = Field( - default=0.5, - ge=0.0, - le=1.0, - description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.", - ) - - -Ranker = Annotated[ - RRFRanker | WeightedRanker, - Field(discriminator="type"), -] -register_schema(Ranker, name="Ranker") - - -@json_schema_type -class RAGDocument(BaseModel): - """ - A document to be used for document ingestion in the RAG Tool. - - :param document_id: The unique identifier for the document. - :param content: The content of the document. - :param mime_type: The MIME type of the document. - :param metadata: Additional metadata for the document. - """ - - document_id: str - content: InterleavedContent | URL - mime_type: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryResult(BaseModel): - """Result of a RAG query containing retrieved content and metadata. - - :param content: (Optional) The retrieved content from the query - :param metadata: Additional metadata about the query result - """ - - content: InterleavedContent | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryGenerator(Enum): - """Types of query generators for RAG systems. - - :cvar default: Default query generator using simple text processing - :cvar llm: LLM-based query generator for enhanced query understanding - :cvar custom: Custom query generator implementation - """ - - default = "default" - llm = "llm" - custom = "custom" - - -@json_schema_type -class RAGSearchMode(StrEnum): - """ - Search modes for RAG query retrieval: - - VECTOR: Uses vector similarity search for semantic matching - - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - """ - - VECTOR = "vector" - KEYWORD = "keyword" - HYBRID = "hybrid" - - -@json_schema_type -class DefaultRAGQueryGeneratorConfig(BaseModel): - """Configuration for the default RAG query generator. - - :param type: Type of query generator, always 'default' - :param separator: String separator used to join query terms - """ - - type: Literal["default"] = "default" - separator: str = " " - - -@json_schema_type -class LLMRAGQueryGeneratorConfig(BaseModel): - """Configuration for the LLM-based RAG query generator. - - :param type: Type of query generator, always 'llm' - :param model: Name of the language model to use for query generation - :param template: Template string for formatting the query generation prompt - """ - - type: Literal["llm"] = "llm" - model: str - template: str - - -RAGQueryGeneratorConfig = Annotated[ - DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig, - Field(discriminator="type"), -] -register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig") - - -@json_schema_type -class RAGQueryConfig(BaseModel): - """ - Configuration for the RAG query generation. - - :param query_generator_config: Configuration for the query generator. - :param max_tokens_in_context: Maximum number of tokens in the context. - :param max_chunks: Maximum number of chunks to retrieve. - :param chunk_template: Template for formatting each retrieved chunk in the context. - Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). - Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" - :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector". - :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker. - """ - - # This config defines how a query is generated using the messages - # for memory bank retrieval. - query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig()) - max_tokens_in_context: int = 4096 - max_chunks: int = 5 - chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" - mode: RAGSearchMode | None = RAGSearchMode.VECTOR - ranker: Ranker | None = Field(default=None) # Only used for hybrid mode - - @field_validator("chunk_template") - def validate_chunk_template(cls, v: str) -> str: - if "{chunk.content}" not in v: - raise ValueError("chunk_template must contain {chunk.content}") - if "{index}" not in v: - raise ValueError("chunk_template must contain {index}") - if len(v) == 0: - raise ValueError("chunk_template must not be empty") - return v - - -@runtime_checkable -@trace_protocol -class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - """Index documents so they can be used by the RAG system. - - :param documents: List of documents to index in the RAG system - :param vector_db_id: ID of the vector database to store the document embeddings - :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing - """ - ... - - @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1) - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - """Query the RAG system for context; typically invoked by the agent. - - :param content: The query content to search for in the indexed documents - :param vector_db_ids: List of vector database IDs to search within - :param query_config: (Optional) Configuration parameters for the query operation - :returns: RAGQueryResult containing the retrieved content and metadata - """ - ... diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py index b6a1a2543..feac0d33e 100644 --- a/llama_stack/apis/tools/tools.py +++ b/llama_stack/apis/tools/tools.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum from typing import Any, Literal, Protocol from pydantic import BaseModel @@ -16,8 +15,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod -from .rag_tool import RAGToolRuntime - @json_schema_type class ToolDef(BaseModel): @@ -181,22 +178,11 @@ class ToolGroups(Protocol): ... -class SpecialToolGroup(Enum): - """Special tool groups with predefined functionality. - - :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval - """ - - rag_tool = "rag_tool" - - @runtime_checkable @trace_protocol class ToolRuntime(Protocol): tool_store: ToolStore | None = None - rag_tool: RAGToolRuntime | None = None - # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1) async def list_runtime_tools( diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index a309c47f9..49e4df039 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -15,7 +15,7 @@ from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id @@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel): total: int +# TODO: rename this as OpenAIVectorStore @json_schema_type class VectorStoreObject(BaseModel): """OpenAI Vector Store object. @@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all chunking_strategy: VectorStoreChunkingStrategy | None = None -class VectorDBStore(Protocol): - def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... +class VectorStoreTable(Protocol): + def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ... @runtime_checkable @trace_protocol class VectorIO(Protocol): - vector_db_store: VectorDBStore | None = None + vector_store_table: VectorStoreTable | None = None # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert_chunks( self, @@ -546,6 +548,7 @@ class VectorIO(Protocol): """ ... + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) async def query_chunks( self, diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_stores/__init__.py similarity index 87% rename from llama_stack/apis/vector_dbs/__init__.py rename to llama_stack/apis/vector_stores/__init__.py index af34ba9d4..8fc34058a 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_stores/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * +from .vector_stores import * diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_stores/vector_stores.py similarity index 56% rename from llama_stack/apis/vector_dbs/vector_dbs.py rename to llama_stack/apis/vector_stores/vector_stores.py index 53bf181e9..524624028 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_stores/vector_stores.py @@ -9,53 +9,43 @@ from typing import Literal from pydantic import BaseModel from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.schema_utils import json_schema_type -@json_schema_type -class VectorDB(Resource): +# Internal resource type for storing the vector store routing and other information +class VectorStore(Resource): """Vector database resource for storing and querying vector embeddings. - :param type: Type of resource, always 'vector_db' for vector databases + :param type: Type of resource, always 'vector_store' for vector stores :param embedding_model: Name of the embedding model to use for vector generation :param embedding_dimension: Dimension of the embedding vectors """ - type: Literal[ResourceType.vector_db] = ResourceType.vector_db + type: Literal[ResourceType.vector_store] = ResourceType.vector_store embedding_model: str embedding_dimension: int - vector_db_name: str | None = None + vector_store_name: str | None = None @property - def vector_db_id(self) -> str: + def vector_store_id(self) -> str: return self.identifier @property - def provider_vector_db_id(self) -> str | None: + def provider_vector_store_id(self) -> str | None: return self.provider_resource_id -class VectorDBInput(BaseModel): +class VectorStoreInput(BaseModel): """Input parameters for creating or configuring a vector database. - :param vector_db_id: Unique identifier for the vector database + :param vector_store_id: Unique identifier for the vector store :param embedding_model: Name of the embedding model to use for vector generation :param embedding_dimension: Dimension of the embedding vectors - :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database + :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store """ - vector_db_id: str + vector_store_id: str embedding_model: str embedding_dimension: int provider_id: str | None = None - provider_vector_db_id: str | None = None - - -class ListVectorDBsResponse(BaseModel): - """Response from listing vector databases. - - :param data: List of vector databases - """ - - data: list[VectorDB] + provider_vector_store_id: str | None = None diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py deleted file mode 100644 index 471d5cb66..000000000 --- a/llama_stack/cli/stack/_build.py +++ /dev/null @@ -1,490 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import importlib.resources -import json -import os -import shutil -import sys -import textwrap -from functools import lru_cache -from importlib.abc import Traversable -from pathlib import Path - -import yaml -from prompt_toolkit import prompt -from prompt_toolkit.completion import WordCompleter -from prompt_toolkit.validation import Validator -from termcolor import colored, cprint - -from llama_stack.cli.stack.utils import ImageType -from llama_stack.cli.table import print_table -from llama_stack.core.build import ( - SERVER_DEPENDENCIES, - build_image, - get_provider_dependencies, -) -from llama_stack.core.configure import parse_and_maybe_upgrade_config -from llama_stack.core.datatypes import ( - BuildConfig, - BuildProvider, - DistributionSpec, - Provider, - StackRunConfig, -) -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.external import load_external_apis -from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.stack import replace_env_vars -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.core.utils.exec import formulate_run_args, run_command -from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack.providers.datatypes import Api -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig - -DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions" - - -@lru_cache -def available_distros_specs() -> dict[str, BuildConfig]: - import yaml - - distro_specs = {} - for p in DISTRIBS_PATH.rglob("*build.yaml"): - distro_name = p.parent.name - with open(p) as f: - build_config = BuildConfig(**yaml.safe_load(f)) - distro_specs[distro_name] = build_config - return distro_specs - - -def run_stack_build_command(args: argparse.Namespace) -> None: - if args.list_distros: - return _run_distro_list_cmd() - - if args.image_type == ImageType.VENV.value: - current_venv = os.environ.get("VIRTUAL_ENV") - image_name = args.image_name or current_venv - else: - image_name = args.image_name - - if args.template: - cprint( - "The --template argument is deprecated. Please use --distro instead.", - color="red", - file=sys.stderr, - ) - distro_name = args.template - else: - distro_name = args.distribution - - if distro_name: - available_distros = available_distros_specs() - if distro_name not in available_distros: - cprint( - f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions", - color="red", - file=sys.stderr, - ) - sys.exit(1) - build_config = available_distros[distro_name] - if args.image_type: - build_config.image_type = args.image_type - else: - cprint( - f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - elif args.providers: - provider_list: dict[str, list[BuildProvider]] = dict() - for api_provider in args.providers.split(","): - if "=" not in api_provider: - cprint( - "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2", - color="red", - file=sys.stderr, - ) - sys.exit(1) - api, provider_type = api_provider.split("=") - providers_for_api = get_provider_registry().get(Api(api), None) - if providers_for_api is None: - cprint( - f"{api} is not a valid API.", - color="red", - file=sys.stderr, - ) - sys.exit(1) - if provider_type in providers_for_api: - provider = BuildProvider( - provider_type=provider_type, - module=None, - ) - provider_list.setdefault(api, []).append(provider) - else: - cprint( - f"{provider} is not a valid provider for the {api} API.", - color="red", - file=sys.stderr, - ) - sys.exit(1) - distribution_spec = DistributionSpec( - providers=provider_list, - description=",".join(args.providers), - ) - if not args.image_type: - cprint( - f"Please specify a image-type (container | venv) for {args.template}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec) - elif not args.config and not distro_name: - name = prompt( - "> Enter a name for your Llama Stack (e.g. my-local-stack): ", - validator=Validator.from_callable( - lambda x: len(x) > 0, - error_message="Name cannot be empty, please enter a name", - ), - ) - - image_type = prompt( - "> Enter the image type you want your Llama Stack to be built as (use to see options): ", - completer=WordCompleter([e.value for e in ImageType]), - complete_while_typing=True, - validator=Validator.from_callable( - lambda x: x in [e.value for e in ImageType], - error_message="Invalid image type. Use to see options", - ), - ) - - image_name = f"llamastack-{name}" - - cprint( - textwrap.dedent( - """ - Llama Stack is composed of several APIs working together. Let's select - the provider types (implementations) you want to use for these APIs. - """, - ), - color="green", - file=sys.stderr, - ) - - cprint("Tip: use to see options for the providers.\n", color="green", file=sys.stderr) - - providers: dict[str, list[BuildProvider]] = dict() - for api, providers_for_api in get_provider_registry().items(): - available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")] - if not available_providers: - continue - api_provider = prompt( - f"> Enter provider for API {api.value}: ", - completer=WordCompleter(available_providers), - complete_while_typing=True, - validator=Validator.from_callable( - lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847 - error_message="Invalid provider, use to see options", - ), - ) - - string_providers = api_provider.split(" ") - - for provider in string_providers: - providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider)) - - description = prompt( - "\n > (Optional) Enter a short description for your Llama Stack: ", - default="", - ) - - distribution_spec = DistributionSpec( - providers=providers, - description=description, - ) - - build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec) - else: - with open(args.config) as f: - try: - contents = yaml.safe_load(f) - contents = replace_env_vars(contents) - build_config = BuildConfig(**contents) - if args.image_type: - build_config.image_type = args.image_type - except Exception as e: - cprint( - f"Could not parse config file {args.config}: {e}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - if args.print_deps_only: - print(f"# Dependencies for {distro_name or args.config or image_name}") - normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config) - normal_deps += SERVER_DEPENDENCIES - print(f"uv pip install {' '.join(normal_deps)}") - for special_dep in special_deps: - print(f"uv pip install {special_dep}") - for external_dep in external_provider_dependencies: - print(f"uv pip install {external_dep}") - return - - try: - run_config = _run_stack_build_command_from_build_config( - build_config, - image_name=image_name, - config_path=args.config, - distro_name=distro_name, - ) - - except (Exception, RuntimeError) as exc: - import traceback - - cprint( - f"Error building stack: {exc}", - color="red", - file=sys.stderr, - ) - cprint("Stack trace:", color="red", file=sys.stderr) - traceback.print_exc() - sys.exit(1) - - if run_config is None: - cprint( - "Run config path is empty", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - if args.run: - config_dict = yaml.safe_load(run_config.read_text()) - config = parse_and_maybe_upgrade_config(config_dict) - if config.external_providers_dir and not config.external_providers_dir.exists(): - config.external_providers_dir.mkdir(exist_ok=True) - run_args = formulate_run_args(args.image_type, image_name or config.image_name) - run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)]) - run_command(run_args) - - -def _generate_run_config( - build_config: BuildConfig, - build_dir: Path, - image_name: str, -) -> Path: - """ - Generate a run.yaml template file for user to edit from a build.yaml file - """ - apis = list(build_config.distribution_spec.providers.keys()) - run_config = StackRunConfig( - container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), - image_name=image_name, - apis=apis, - providers={}, - external_providers_dir=build_config.external_providers_dir - if build_config.external_providers_dir - else EXTERNAL_PROVIDERS_DIR, - ) - if not run_config.inference_store: - run_config.inference_store = SqliteSqlStoreConfig( - **SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db" - ) - ) - # build providers dict - provider_registry = get_provider_registry(build_config) - for api in apis: - run_config.providers[api] = [] - providers = build_config.distribution_spec.providers[api] - - for provider in providers: - pid = provider.provider_type.split("::")[-1] - - p = provider_registry[Api(api)][provider.provider_type] - if p.deprecation_error: - raise InvalidProviderError(p.deprecation_error) - - try: - config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class) - except (ModuleNotFoundError, ValueError) as exc: - # HACK ALERT: - # This code executes after building is done, the import cannot work since the - # package is either available in the venv or container - not available on the host. - # TODO: use a "is_external" flag in ProviderSpec to check if the provider is - # external - cprint( - f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}", - color="yellow", - file=sys.stderr, - ) - # Set config_type to None to avoid UnboundLocalError - config_type = None - - if config_type is not None and hasattr(config_type, "sample_run_config"): - config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}") - else: - config = {} - - p_spec = Provider( - provider_id=pid, - provider_type=provider.provider_type, - config=config, - module=provider.module, - ) - run_config.providers[api].append(p_spec) - - run_config_file = build_dir / f"{image_name}-run.yaml" - - with open(run_config_file, "w") as f: - to_write = json.loads(run_config.model_dump_json()) - f.write(yaml.dump(to_write, sort_keys=False)) - - # Only print this message for non-container builds since it will be displayed before the - # container is built - # For non-container builds, the run.yaml is generated at the very end of the build process so it - # makes sense to display this message - if build_config.image_type != LlamaStackImageType.CONTAINER.value: - cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) - return run_config_file - - -def _run_stack_build_command_from_build_config( - build_config: BuildConfig, - image_name: str | None = None, - distro_name: str | None = None, - config_path: str | None = None, -) -> Path | Traversable: - image_name = image_name or build_config.image_name - if build_config.image_type == LlamaStackImageType.CONTAINER.value: - if distro_name: - image_name = f"distribution-{distro_name}" - else: - if not image_name: - raise ValueError("Please specify an image name when building a container image without a template") - else: - if not image_name and os.environ.get("UV_SYSTEM_PYTHON"): - image_name = "__system__" - if not image_name: - raise ValueError("Please specify an image name when building a venv image") - - # At this point, image_name should be guaranteed to be a string - if image_name is None: - raise ValueError("image_name should not be None after validation") - - if distro_name: - build_dir = DISTRIBS_BASE_DIR / distro_name - build_file_path = build_dir / f"{distro_name}-build.yaml" - else: - if image_name is None: - raise ValueError("image_name cannot be None") - build_dir = DISTRIBS_BASE_DIR / image_name - build_file_path = build_dir / f"{image_name}-build.yaml" - - os.makedirs(build_dir, exist_ok=True) - run_config_file = None - # Generate the run.yaml so it can be included in the container image with the proper entrypoint - # Only do this if we're building a container image and we're not using a template - if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path: - cprint("Generating run.yaml file", color="yellow", file=sys.stderr) - run_config_file = _generate_run_config(build_config, build_dir, image_name) - - with open(build_file_path, "w") as f: - to_write = json.loads(build_config.model_dump_json(exclude_none=True)) - f.write(yaml.dump(to_write, sort_keys=False)) - - # We first install the external APIs so that the build process can use them and discover the - # providers dependencies - if build_config.external_apis_dir: - cprint("Installing external APIs", color="yellow", file=sys.stderr) - external_apis = load_external_apis(build_config) - if external_apis: - # install the external APIs - packages = [] - for _, api_spec in external_apis.items(): - if api_spec.pip_packages: - packages.extend(api_spec.pip_packages) - cprint( - f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}", - color="yellow", - file=sys.stderr, - ) - return_code = run_command(["uv", "pip", "install", *packages]) - if return_code != 0: - packages_str = ", ".join(packages) - raise RuntimeError( - f"Failed to install external APIs packages: {packages_str} (return code: {return_code})" - ) - - return_code = build_image( - build_config, - image_name, - distro_or_config=distro_name or config_path or str(build_file_path), - run_config=run_config_file.as_posix() if run_config_file else None, - ) - if return_code != 0: - raise RuntimeError(f"Failed to build image {image_name}") - - if distro_name: - # copy run.yaml from distribution to build_dir instead of generating it again - distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml" - run_config_file = build_dir / f"{distro_name}-run.yaml" - - with importlib.resources.as_file(distro_path) as path: - shutil.copy(path, run_config_file) - - cprint("Build Successful!", color="green", file=sys.stderr) - cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr) - if build_config.image_type == LlamaStackImageType.VENV: - cprint( - "You can run the new Llama Stack distro (after activating " - + colored(image_name, "cyan") - + ") via: " - + colored(f"llama stack run {run_config_file}", "blue"), - color="green", - file=sys.stderr, - ) - elif build_config.image_type == LlamaStackImageType.CONTAINER: - cprint( - "You can run the container with: " - + colored( - f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue" - ), - color="green", - file=sys.stderr, - ) - return distro_path - else: - return _generate_run_config(build_config, build_dir, image_name) - - -def _run_distro_list_cmd() -> None: - headers = [ - "Distribution Name", - # "Providers", - "Description", - ] - - rows = [] - for distro_name, spec in available_distros_specs().items(): - rows.append( - [ - distro_name, - # json.dumps(spec.distribution_spec.providers, indent=2), - spec.distribution_spec.description, - ] - ) - print_table( - rows, - headers, - separate_rows=True, - ) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py deleted file mode 100644 index cbe8ed881..000000000 --- a/llama_stack/cli/stack/build.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import argparse -import textwrap - -from llama_stack.cli.stack.utils import ImageType -from llama_stack.cli.subcommand import Subcommand -from llama_stack.log import get_logger - -logger = get_logger(__name__, category="cli") - - -class StackBuild(Subcommand): - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "build", - prog="llama stack build", - description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps ' instead.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_stack_build_command) - - def _add_arguments(self): - self.parser.add_argument( - "--config", - type=str, - default=None, - help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively", - ) - - self.parser.add_argument( - "--template", - type=str, - default=None, - help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""", - ) - self.parser.add_argument( - "--distro", - "--distribution", - dest="distribution", - type=str, - default=None, - help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""", - ) - - self.parser.add_argument( - "--list-distros", - "--list-distributions", - action="store_true", - dest="list_distros", - default=False, - help="Show the available distributions for building a Llama Stack distribution", - ) - - self.parser.add_argument( - "--image-type", - type=str, - help="Image Type to use for the build. If not specified, will use the image type from the template config.", - choices=[e.value for e in ImageType], - default=None, # no default so we can detect if a user specified --image-type and override image_type in the config - ) - - self.parser.add_argument( - "--image-name", - type=str, - help=textwrap.dedent( - f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for -the build. If not specified, currently active environment will be used if found. - """ - ), - default=None, - ) - self.parser.add_argument( - "--print-deps-only", - default=False, - action="store_true", - help="Print the dependencies for the stack only, without building the stack", - ) - - self.parser.add_argument( - "--run", - action="store_true", - default=False, - help="Run the stack after building using the same image type, name, and other applicable arguments", - ) - self.parser.add_argument( - "--providers", - type=str, - default=None, - help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.", - ) - - def _run_stack_build_command(self, args: argparse.Namespace) -> None: - logger.warning( - "The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'" - ) - # always keep implementation completely silo-ed away from CLI so CLI - # can be fast to load and reduces dependencies - from ._build import run_stack_build_command - - return run_stack_build_command(args) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index fd0a4edf5..351da972f 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -11,7 +11,6 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand -from .build import StackBuild from .list_apis import StackListApis from .list_deps import StackListDeps from .list_providers import StackListProviders @@ -41,7 +40,6 @@ class StackParser(Subcommand): # Add sub-commands StackListDeps.create(subparsers) - StackBuild.create(subparsers) StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py index 4d4c1b538..cc1ca051b 100644 --- a/llama_stack/cli/stack/utils.py +++ b/llama_stack/cli/stack/utils.py @@ -17,10 +17,19 @@ from llama_stack.core.datatypes import ( BuildConfig, Provider, StackRunConfig, + StorageConfig, ) from llama_stack.core.distribution import get_provider_registry from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, +) +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api @@ -51,11 +60,23 @@ def generate_run_config( Generate a run.yaml template file for user to edit from a build.yaml file """ apis = list(build_config.distribution_spec.providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / image_name run_config = StackRunConfig( container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), image_name=image_name, apis=apis, providers={}, + storage=StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")), + "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + ), + ), external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else EXTERNAL_PROVIDERS_DIR, diff --git a/llama_stack/core/access_control/datatypes.py b/llama_stack/core/access_control/datatypes.py index c833ed51b..84beb8e15 100644 --- a/llama_stack/core/access_control/datatypes.py +++ b/llama_stack/core/access_control/datatypes.py @@ -41,7 +41,7 @@ class AccessRule(BaseModel): A rule defines a list of action either to permit or to forbid. It may specify a principal or a resource that must match for the rule to take effect. The resource to match should be specified in the form of a type qualified identifier, e.g. - model::my-model or vector_db::some-db, or a wildcard for all resources of a type, + model::my-model or vector_store::some-db, or a wildcard for all resources of a type, e.g. model::*. If the principal or resource are not specified, they will match all requests. @@ -79,9 +79,9 @@ class AccessRule(BaseModel): description: any user has read access to any resource created by a member of their team - forbid: actions: [create, read, delete] - resource: vector_db::* + resource: vector_store::* unless: user with admin in roles - description: only user with admin role can use vector_db resources + description: only user with admin role can use vector_store resources """ diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh deleted file mode 100755 index 03ed846d9..000000000 --- a/llama_stack/core/build_container.sh +++ /dev/null @@ -1,410 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} -LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-} - -TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} -PYPI_VERSION=${PYPI_VERSION:-} -BUILD_PLATFORM=${BUILD_PLATFORM:-} -# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} - -# mounting is not supported by docker buildx, so we use COPY instead -USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-} -# Path to the run.yaml file in the container -RUN_CONFIG_PATH=/app/run.yaml - -BUILD_CONTEXT_DIR=$(pwd) - -set -euo pipefail - -# Define color codes -RED='\033[0;31m' -NC='\033[0m' # No Color - -# Usage function -usage() { - echo "Usage: $0 --image-name --container-base --normal-deps [--run-config ] [--external-provider-deps ] [--optional-deps ]" - echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'" - exit 1 -} - -# Parse arguments -image_name="" -container_base="" -normal_deps="" -external_provider_deps="" -optional_deps="" -run_config="" -distro_or_config="" - -while [[ $# -gt 0 ]]; do - key="$1" - case "$key" in - --image-name) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --image-name requires a string value" >&2 - usage - fi - image_name="$2" - shift 2 - ;; - --container-base) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --container-base requires a string value" >&2 - usage - fi - container_base="$2" - shift 2 - ;; - --normal-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --normal-deps requires a string value" >&2 - usage - fi - normal_deps="$2" - shift 2 - ;; - --external-provider-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --external-provider-deps requires a string value" >&2 - usage - fi - external_provider_deps="$2" - shift 2 - ;; - --optional-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --optional-deps requires a string value" >&2 - usage - fi - optional_deps="$2" - shift 2 - ;; - --run-config) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --run-config requires a string value" >&2 - usage - fi - run_config="$2" - shift 2 - ;; - --distro-or-config) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --distro-or-config requires a string value" >&2 - usage - fi - distro_or_config="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - usage - ;; - esac -done - -# Check required arguments -if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then - echo "Error: --image-name, --container-base, and --normal-deps are required." >&2 - usage -fi - -CONTAINER_BINARY=${CONTAINER_BINARY:-docker} -CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain} -TEMP_DIR=$(mktemp -d) -SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -source "$SCRIPT_DIR/common.sh" - -add_to_container() { - output_file="$TEMP_DIR/Containerfile" - if [ -t 0 ]; then - printf '%s\n' "$1" >>"$output_file" - else - cat >>"$output_file" - fi -} - -if ! is_command_available "$CONTAINER_BINARY"; then - printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2 - exit 1 -fi - -if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then - add_to_container << EOF -FROM $container_base -WORKDIR /app - -# We install the Python 3.12 dev headers and build tools so that any -# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully. - -RUN dnf -y update && dnf install -y iputils git net-tools wget \ - vim-minimal python3.12 python3.12-pip python3.12-wheel \ - python3.12-setuptools python3.12-devel gcc gcc-c++ make && \ - ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all - -ENV UV_SYSTEM_PYTHON=1 -RUN pip install uv -EOF -else - add_to_container << EOF -FROM $container_base -WORKDIR /app - -RUN apt-get update && apt-get install -y \ - iputils-ping net-tools iproute2 dnsutils telnet \ - curl wget telnet git\ - procps psmisc lsof \ - traceroute \ - bubblewrap \ - gcc g++ \ - && rm -rf /var/lib/apt/lists/* - -ENV UV_SYSTEM_PYTHON=1 -RUN pip install uv -EOF -fi - -# Add pip dependencies first since llama-stack is what will change most often -# so we can reuse layers. -if [ -n "$normal_deps" ]; then - read -ra pip_args <<< "$normal_deps" - quoted_deps=$(printf " %q" "${pip_args[@]}") - add_to_container << EOF -RUN uv pip install --no-cache $quoted_deps -EOF -fi - -if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - read -ra pip_args <<< "$part" - quoted_deps=$(printf " %q" "${pip_args[@]}") - add_to_container <=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0] - module = importlib.import_module(f'{package_name}.provider') - spec = module.get_provider_spec() - if hasattr(spec, 'pip_packages') and spec.pip_packages: - if isinstance(spec.pip_packages, (list, tuple)): - print('\n'.join(spec.pip_packages)) -except Exception as e: - print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr) -PYTHON -EOF - done -fi - -get_python_cmd() { - if is_command_available python; then - echo "python" - elif is_command_available python3; then - echo "python3" - else - echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2 - exit 1 - fi -} - -if [ -n "$run_config" ]; then - # Copy the run config to the build context since it's an absolute path - cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml" - - # Parse the run.yaml configuration to identify external provider directories - # If external providers are specified, copy their directory to the container - # and update the configuration to reference the new container path - python_cmd=$(get_python_cmd) - external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')") - external_providers_dir=$(eval echo "$external_providers_dir") - if [ -n "$external_providers_dir" ]; then - if [ -d "$external_providers_dir" ]; then - echo "Copying external providers directory: $external_providers_dir" - cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d" - add_to_container << EOF -COPY providers.d /.llama/providers.d -EOF - fi - - # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d - if [ "$(uname)" = "Darwin" ]; then - sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml" - rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak" - else - sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml" - fi - fi - - # Copy run config into docker image - add_to_container << EOF -COPY run.yaml $RUN_CONFIG_PATH -EOF -fi - -stack_mount="/app/llama-stack-source" -client_mount="/app/llama-stack-client-source" - -install_local_package() { - local dir="$1" - local mount_point="$2" - local name="$3" - - if [ ! -d "$dir" ]; then - echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2 - exit 1 - fi - - if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then - add_to_container << EOF -COPY $dir $mount_point -EOF - fi - add_to_container << EOF -RUN uv pip install --no-cache -e $mount_point -EOF -} - - -if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR" -fi - -if [ -n "$LLAMA_STACK_DIR" ]; then - install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR" -else - if [ -n "$TEST_PYPI_VERSION" ]; then - # these packages are damaged in test-pypi, so install them first - add_to_container << EOF -RUN uv pip install --no-cache fastapi libcst -EOF - add_to_container << EOF -RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \ - --index-strategy unsafe-best-match \ - llama-stack==$TEST_PYPI_VERSION - -EOF - else - if [ -n "$PYPI_VERSION" ]; then - SPEC_VERSION="llama-stack==${PYPI_VERSION}" - else - SPEC_VERSION="llama-stack" - fi - add_to_container << EOF -RUN uv pip install --no-cache $SPEC_VERSION -EOF - fi -fi - -# remove uv after installation - add_to_container << EOF -RUN pip uninstall -y uv -EOF - -# If a run config is provided, we use the llama stack CLI -if [[ -n "$run_config" ]]; then - add_to_container << EOF -ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"] -EOF -elif [[ "$distro_or_config" != *.yaml ]]; then - add_to_container << EOF -ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"] -EOF -fi - -# Add other require item commands genearic to all containers -add_to_container << EOF - -RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true) -EOF - -printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" -cat "$TEMP_DIR"/Containerfile -printf "\n" - -# Start building the CLI arguments -CLI_ARGS=() - -# Read CONTAINER_OPTS and put it in an array -read -ra CLI_ARGS <<< "$CONTAINER_OPTS" - -if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then - if [ -n "$LLAMA_STACK_DIR" ]; then - CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount") - fi - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount") - fi -fi - -if is_command_available selinuxenabled && selinuxenabled; then - # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - CLI_ARGS+=("--security-opt" "label=disable") -fi - -# Set version tag based on PyPI version -if [ -n "$PYPI_VERSION" ]; then - version_tag="$PYPI_VERSION" -elif [ -n "$TEST_PYPI_VERSION" ]; then - version_tag="test-$TEST_PYPI_VERSION" -elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then - version_tag="dev" -else - URL="https://pypi.org/pypi/llama-stack/json" - version_tag=$(curl -s $URL | jq -r '.info.version') -fi - -# Add version tag to image name -image_tag="$image_name:$version_tag" - -# Detect platform architecture -ARCH=$(uname -m) -if [ -n "$BUILD_PLATFORM" ]; then - CLI_ARGS+=("--platform" "$BUILD_PLATFORM") -elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then - CLI_ARGS+=("--platform" "linux/arm64") -elif [ "$ARCH" = "x86_64" ]; then - CLI_ARGS+=("--platform" "linux/amd64") -else - echo "Unsupported architecture: $ARCH" - exit 1 -fi - -echo "PWD: $(pwd)" -echo "Containerfile: $TEMP_DIR/Containerfile" -set -x - -$CONTAINER_BINARY build \ - "${CLI_ARGS[@]}" \ - -t "$image_tag" \ - -f "$TEMP_DIR/Containerfile" \ - "$BUILD_CONTEXT_DIR" - -# clean up tmp/configs -rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" -set +x - -echo "Success!" diff --git a/llama_stack/core/build_venv.sh b/llama_stack/core/build_venv.sh deleted file mode 100755 index 04927d71e..000000000 --- a/llama_stack/core/build_venv.sh +++ /dev/null @@ -1,220 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} -LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-} -TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} -# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} -UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-} -VIRTUAL_ENV=${VIRTUAL_ENV:-} - -set -euo pipefail - -# Define color codes -RED='\033[0;31m' -NC='\033[0m' # No Color - -SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -source "$SCRIPT_DIR/common.sh" - -# Usage function -usage() { - echo "Usage: $0 --env-name --normal-deps [--external-provider-deps ] [--optional-deps ]" - echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'" - exit 1 -} - -# Parse arguments -env_name="" -normal_deps="" -external_provider_deps="" -optional_deps="" - -while [[ $# -gt 0 ]]; do - key="$1" - case "$key" in - --env-name) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --env-name requires a string value" >&2 - usage - fi - env_name="$2" - shift 2 - ;; - --normal-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --normal-deps requires a string value" >&2 - usage - fi - normal_deps="$2" - shift 2 - ;; - --external-provider-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --external-provider-deps requires a string value" >&2 - usage - fi - external_provider_deps="$2" - shift 2 - ;; - --optional-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --optional-deps requires a string value" >&2 - usage - fi - optional_deps="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - usage - ;; - esac -done - -# Check required arguments -if [[ -z "$env_name" || -z "$normal_deps" ]]; then - echo "Error: --env-name and --normal-deps are required." >&2 - usage -fi - -if [ -n "$LLAMA_STACK_DIR" ]; then - echo "Using llama-stack-dir=$LLAMA_STACK_DIR" -fi -if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR" -fi - -ENVNAME="" - -# pre-run checks to make sure we can proceed with the installation -pre_run_checks() { - local env_name="$1" - - if ! is_command_available uv; then - echo "uv is not installed, trying to install it." - if ! is_command_available pip; then - echo "pip is not installed, cannot automatically install 'uv'." - echo "Follow this link to install it:" - echo "https://docs.astral.sh/uv/getting-started/installation/" - exit 1 - else - pip install uv - fi - fi - - # checking if an environment with the same name already exists - if [ -d "$env_name" ]; then - echo "Environment '$env_name' already exists, re-using it." - fi -} - -run() { - # Use only global variables set by flag parser - if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then - echo "Installing dependencies in system Python environment" - export UV_SYSTEM_PYTHON=1 - elif [ "$VIRTUAL_ENV" == "$env_name" ]; then - echo "Virtual environment $env_name is already active" - else - echo "Using virtual environment $env_name" - uv venv "$env_name" - source "$env_name/bin/activate" - fi - - if [ -n "$TEST_PYPI_VERSION" ]; then - uv pip install fastapi libcst - uv pip install --extra-index-url https://test.pypi.org/simple/ \ - --index-strategy unsafe-best-match \ - llama-stack=="$TEST_PYPI_VERSION" \ - $normal_deps - if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - echo "$part" - uv pip install $part - done - fi - if [ -n "$external_provider_deps" ]; then - IFS='#' read -ra parts <<<"$external_provider_deps" - for part in "${parts[@]}"; do - echo "$part" - uv pip install "$part" - done - fi - else - if [ -n "$LLAMA_STACK_DIR" ]; then - # only warn if DIR does not start with "git+" - if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then - printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2 - exit 1 - fi - printf "Installing from LLAMA_STACK_DIR: %s\n" "$LLAMA_STACK_DIR" - # editable only if LLAMA_STACK_DIR does not start with "git+" - if [[ "$LLAMA_STACK_DIR" != git+* ]]; then - EDITABLE="-e" - else - EDITABLE="" - fi - uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR" - else - uv pip install --no-cache-dir llama-stack - fi - - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - # only warn if DIR does not start with "git+" - if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then - printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2 - exit 1 - fi - printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR" - # editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+" - if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then - EDITABLE="-e" - else - EDITABLE="" - fi - uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR" - fi - - printf "Installing pip dependencies\n" - uv pip install $normal_deps - if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - echo "Installing special provider module: $part" - uv pip install $part - done - fi - if [ -n "$external_provider_deps" ]; then - IFS='#' read -ra parts <<<"$external_provider_deps" - for part in "${parts[@]}"; do - echo "Installing external provider module: $part" - uv pip install "$part" - echo "Getting provider spec for module: $part and installing dependencies" - package_name=$(echo "$part" | sed 's/[<>=!].*//') - python3 -c " -import importlib -import sys -try: - module = importlib.import_module(f'$package_name.provider') - spec = module.get_provider_spec() - if hasattr(spec, 'pip_packages') and spec.pip_packages: - print('\\n'.join(spec.pip_packages)) -except Exception as e: - print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr) -" | uv pip install -r - - done - fi - fi -} - -pre_run_checks "$env_name" -run diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py index bfa2c6d71..734839ea9 100644 --- a/llama_stack/core/configure.py +++ b/llama_stack/core/configure.py @@ -159,6 +159,37 @@ def upgrade_from_routing_table( config_dict["apis"] = config_dict["apis_to_serve"] config_dict.pop("apis_to_serve", None) + # Add default storage config if not present + if "storage" not in config_dict: + config_dict["storage"] = { + "backends": { + "kv_default": { + "type": "kv_sqlite", + "db_path": "~/.llama/kvstore.db", + }, + "sql_default": { + "type": "sql_sqlite", + "db_path": "~/.llama/sql_store.db", + }, + }, + "stores": { + "metadata": { + "namespace": "registry", + "backend": "kv_default", + }, + "inference": { + "table_name": "inference_store", + "backend": "sql_default", + "max_write_queue_size": 10000, + "num_writers": 4, + }, + "conversations": { + "table_name": "openai_conversations", + "backend": "sql_default", + }, + }, + } + return config_dict diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index d2537c7ee..66880ca36 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import os import secrets import time from typing import Any @@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import ( Conversations, Metadata, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.datatypes import AccessRule, StackRunConfig from llama_stack.log import get_logger from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import ( - SqliteSqlStoreConfig, - SqlStoreConfig, - sqlstore_impl, -) +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl logger = get_logger(name=__name__, category="openai_conversations") @@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations") class ConversationServiceConfig(BaseModel): """Configuration for the built-in conversation service. - :param conversations_store: SQL store configuration for conversations (defaults to SQLite) + :param run_config: Stack run configuration for resolving persistence :param policy: Access control rules """ - conversations_store: SqlStoreConfig = SqliteSqlStoreConfig( - db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix() - ) + run_config: StackRunConfig policy: list[AccessRule] = [] @@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations): self.deps = deps self.policy = config.policy - base_sql_store = sqlstore_impl(config.conversations_store) + # Use conversations store reference from run config + conversations_ref = config.run_config.storage.stores.conversations + if not conversations_ref: + raise ValueError("storage.stores.conversations must be configured in run config") + + base_sql_store = sqlstore_impl(conversations_ref) self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) async def initialize(self) -> None: """Initialize the store and create tables.""" - if isinstance(self.config.conversations_store, SqliteSqlStoreConfig): - os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True) - await self.sql_store.create_table( "openai_conversations", { diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 94222d49e..5f4775d87 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -23,12 +23,15 @@ from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput from llama_stack.apis.shields import Shield, ShieldInput from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime -from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput from llama_stack.core.access_control.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ( + KVStoreReference, + StorageBackendType, + StorageConfig, +) from llama_stack.providers.datatypes import Api, ProviderSpec -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_RUN_CONFIG_VERSION = 2 @@ -68,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner): pass -class VectorDBWithOwner(VectorDB, ResourceWithOwner): +class VectorStoreWithOwner(VectorStore, ResourceWithOwner): pass @@ -88,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner): pass -RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup +RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup RoutableObjectWithProvider = Annotated[ ModelWithOwner | ShieldWithOwner - | VectorDBWithOwner + | VectorStoreWithOwner | DatasetWithOwner | ScoringFnWithOwner | BenchmarkWithOwner @@ -351,12 +354,32 @@ class AuthenticationRequiredError(Exception): pass +class QualifiedModel(BaseModel): + """A qualified model identifier, consisting of a provider ID and a model ID.""" + + provider_id: str + model_id: str + + +class VectorStoresConfig(BaseModel): + """Configuration for vector stores in the stack.""" + + default_provider_id: str | None = Field( + default=None, + description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.", + ) + default_embedding_model: QualifiedModel | None = Field( + default=None, + description="Default embedding model configuration for vector stores.", + ) + + class QuotaPeriod(StrEnum): DAY = "day" class QuotaConfig(BaseModel): - kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") authenticated_max_requests: int = Field( default=1000, description="Max requests for authenticated clients per period" @@ -399,6 +422,18 @@ def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | N raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}") +class RegisteredResources(BaseModel): + """Registry of resources available in the distribution.""" + + models: list[ModelInput] = Field(default_factory=list) + shields: list[ShieldInput] = Field(default_factory=list) + vector_stores: list[VectorStoreInput] = Field(default_factory=list) + datasets: list[DatasetInput] = Field(default_factory=list) + scoring_fns: list[ScoringFnInput] = Field(default_factory=list) + benchmarks: list[BenchmarkInput] = Field(default_factory=list) + tool_groups: list[ToolGroupInput] = Field(default_factory=list) + + class ServerConfig(BaseModel): port: int = Field( default=8321, @@ -438,18 +473,6 @@ class ServerConfig(BaseModel): ) -class InferenceStoreConfig(BaseModel): - sql_store_config: SqlStoreConfig - max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store") - num_writers: int = Field(default=4, description="Number of concurrent background writers") - - -class ResponsesStoreConfig(BaseModel): - sql_store_config: SqlStoreConfig - max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") - num_writers: int = Field(default=4, description="Number of concurrent background writers") - - class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION @@ -476,37 +499,15 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re can be instantiated multiple times (with different configs) if necessary. """, ) - metadata_store: KVStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the distribution registry. If not specified, -a default SQLite store will be used.""", + storage: StorageConfig = Field( + description="Catalog of named storage backends and references available to the stack", ) - inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the inference API. Can be either a -InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated). -If not specified, a default SQLite store will be used.""", + registered_resources: RegisteredResources = Field( + default_factory=RegisteredResources, + description="Registry of resources available in the distribution", ) - conversations_store: SqlStoreConfig | None = Field( - default=None, - description=""" -Configuration for the persistence store used by the conversations API. -If not specified, a default SQLite store will be used.""", - ) - - # registry of "resources" in the distribution - models: list[ModelInput] = Field(default_factory=list) - shields: list[ShieldInput] = Field(default_factory=list) - vector_dbs: list[VectorDBInput] = Field(default_factory=list) - datasets: list[DatasetInput] = Field(default_factory=list) - scoring_fns: list[ScoringFnInput] = Field(default_factory=list) - benchmarks: list[BenchmarkInput] = Field(default_factory=list) - tool_groups: list[ToolGroupInput] = Field(default_factory=list) - logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging") telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry") @@ -526,6 +527,11 @@ If not specified, a default SQLite store will be used.""", description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", ) + vector_stores: VectorStoresConfig | None = Field( + default=None, + description="Configuration for vector stores, including default embedding model", + ) + @field_validator("external_providers_dir") @classmethod def validate_external_providers_dir(cls, v): @@ -535,6 +541,49 @@ If not specified, a default SQLite store will be used.""", return Path(v) return v + @model_validator(mode="after") + def validate_server_stores(self) -> "StackRunConfig": + backend_map = self.storage.backends + stores = self.storage.stores + kv_backends = { + name + for name, cfg in backend_map.items() + if cfg.type + in { + StorageBackendType.KV_REDIS, + StorageBackendType.KV_SQLITE, + StorageBackendType.KV_POSTGRES, + StorageBackendType.KV_MONGODB, + } + } + sql_backends = { + name + for name, cfg in backend_map.items() + if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES} + } + + def _ensure_backend(reference, expected_set, store_name: str) -> None: + if reference is None: + return + backend_name = reference.backend + if backend_name not in backend_map: + raise ValueError( + f"{store_name} references unknown backend '{backend_name}'. " + f"Available backends: {sorted(backend_map)}" + ) + if backend_name not in expected_set: + raise ValueError( + f"{store_name} references backend '{backend_name}' of type " + f"'{backend_map[backend_name].type.value}', but a backend of type " + f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." + ) + + _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata") + _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") + _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") + _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") + return self + class BuildConfig(BaseModel): version: int = LLAMA_STACK_BUILD_CONFIG_VERSION diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py index 0e1f672c3..82cbcf984 100644 --- a/llama_stack/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]: routing_table_api=Api.tool_groups, router_api=Api.tool_runtime, ), + AutoRoutedApiInfo( + routing_table_api=Api.vector_stores, + router_api=Api.vector_io, + ), ] diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 1179075cd..328ca9c6e 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -278,7 +278,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): else: prefix = "!" if in_notebook() else "" cprint( - f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n", + f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n", "yellow", file=sys.stderr, ) diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py index 26e8f5cef..856397ca5 100644 --- a/llama_stack/core/prompts/prompts.py +++ b/llama_stack/core/prompts/prompts.py @@ -11,9 +11,8 @@ from pydantic import BaseModel from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig class PromptServiceConfig(BaseModel): @@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts): self.kvstore: KVStore async def initialize(self) -> None: - kvstore_config = SqliteKVStoreConfig( - db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix() - ) - self.kvstore = await kvstore_impl(kvstore_config) + # Use metadata store backend with prompts-specific namespace + metadata_ref = self.config.run_config.storage.stores.metadata + if not metadata_ref: + raise ValueError("storage.stores.metadata must be configured in run config") + prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) + self.kvstore = await kvstore_impl(prompts_ref) def _get_default_key(self, prompt_id: str) -> str: """Get the KVStore key that stores the default version number.""" diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index acd459f99..0b63815ea 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -30,6 +30,7 @@ from llama_stack.apis.shields import Shields from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( @@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.inspect: Inspect, Api.batches: Batches, Api.vector_io: VectorIO, + Api.vector_stores: VectorStore, Api.models: Models, Api.safety: Safety, Api.shields: Shields, diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py index 4463d2460..20c17e59d 100644 --- a/llama_stack/core/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -6,7 +6,10 @@ from typing import Any -from llama_stack.core.datatypes import AccessRule, RoutedProtocol +from llama_stack.core.datatypes import ( + AccessRule, + RoutedProtocol, +) from llama_stack.core.stack import StackRunConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.datatypes import Api, RoutingTable @@ -26,6 +29,7 @@ async def get_routing_table_impl( from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable + from ..routing_tables.vector_stores import VectorStoresRoutingTable api_to_tables = { "models": ModelsRoutingTable, @@ -34,6 +38,7 @@ async def get_routing_table_impl( "scoring_functions": ScoringFunctionsRoutingTable, "benchmarks": BenchmarksRoutingTable, "tool_groups": ToolGroupsRoutingTable, + "vector_stores": VectorStoresRoutingTable, } if api.value not in api_to_tables: @@ -76,14 +81,21 @@ async def get_auto_router_impl( api_to_dep_impl[dep_name] = deps[dep_api] # TODO: move pass configs to routers instead - if api == Api.inference and run_config.inference_store: + if api == Api.inference: + inference_ref = run_config.storage.stores.inference + if not inference_ref: + raise ValueError("storage.stores.inference must be configured in run config") + inference_store = InferenceStore( - config=run_config.inference_store, + reference=inference_ref, policy=policy, ) await inference_store.initialize() api_to_dep_impl["store"] = inference_store + elif api == Api.vector_io: + api_to_dep_impl["vector_stores_config"] = run_config.vector_stores + impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) await impl.initialize() return impl diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py index ad82293e5..7c5bb25c6 100644 --- a/llama_stack/core/routers/tool_runtime.py +++ b/llama_stack/core/routers/tool_runtime.py @@ -8,16 +8,8 @@ from typing import Any from llama_stack.apis.common.content_types import ( URL, - InterleavedContent, -) -from llama_stack.apis.tools import ( - ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, - ToolRuntime, ) +from llama_stack.apis.tools import ListToolDefsResponse, ToolRuntime from llama_stack.log import get_logger from ..routing_tables.toolgroups import ToolGroupsRoutingTable @@ -26,36 +18,6 @@ logger = get_logger(name=__name__, category="core::routers") class ToolRuntimeRouter(ToolRuntime): - class RagToolImpl(RAGToolRuntime): - def __init__( - self, - routing_table: ToolGroupsRoutingTable, - ) -> None: - logger.debug("Initializing ToolRuntimeRouter.RagToolImpl") - self.routing_table = routing_table - - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}") - provider = await self.routing_table.get_provider_impl("knowledge_search") - return await provider.query(content, vector_db_ids, query_config) - - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - logger.debug( - f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" - ) - provider = await self.routing_table.get_provider_impl("insert_into_memory") - return await provider.insert(documents, vector_db_id, chunk_size_in_tokens) - def __init__( self, routing_table: ToolGroupsRoutingTable, @@ -63,11 +25,6 @@ class ToolRuntimeRouter(ToolRuntime): logger.debug("Initializing ToolRuntimeRouter") self.routing_table = routing_table - # HACK ALERT this should be in sync with "get_all_api_endpoints()" - self.rag_tool = self.RagToolImpl(routing_table) - for method in ("query", "insert"): - setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) - async def initialize(self) -> None: logger.debug("ToolRuntimeRouter.initialize") pass diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index f4e871a40..2b1701dc2 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable @@ -43,9 +44,11 @@ class VectorIORouter(VectorIO): def __init__( self, routing_table: RoutingTable, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: logger.debug("Initializing VectorIORouter") self.routing_table = routing_table + self.vector_stores_config = vector_stores_config async def initialize(self) -> None: logger.debug("VectorIORouter.initialize") @@ -68,25 +71,6 @@ class VectorIORouter(VectorIO): raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") - async def register_vector_db( - self, - vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - vector_db_name: str | None = None, - provider_vector_db_id: str | None = None, - ) -> None: - logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}") - await self.routing_table.register_vector_db( - vector_db_id, - embedding_model, - embedding_dimension, - provider_id, - vector_db_name, - provider_vector_db_id, - ) - async def insert_chunks( self, vector_db_id: str, @@ -122,6 +106,17 @@ class VectorIORouter(VectorIO): embedding_dimension = extra.get("embedding_dimension") provider_id = extra.get("provider_id") + # Use default embedding model if not specified + if ( + embedding_model is None + and self.vector_stores_config + and self.vector_stores_config.default_embedding_model is not None + ): + # Construct the full model ID with provider prefix + embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id + model_id = self.vector_stores_config.default_embedding_model.model_id + embedding_model = f"{embedding_provider_id}/{model_id}" + if embedding_model is not None and embedding_dimension is None: embedding_dimension = await self._get_embedding_model_dimension(embedding_model) @@ -132,28 +127,41 @@ class VectorIORouter(VectorIO): raise ValueError("No vector_io providers available") if num_providers > 1: available_providers = list(self.routing_table.impls_by_provider_id.keys()) - raise ValueError( - f"Multiple vector_io providers available. Please specify provider_id in extra_body. " - f"Available providers: {available_providers}" - ) - provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] + # Use default configured provider + if self.vector_stores_config and self.vector_stores_config.default_provider_id: + default_provider = self.vector_stores_config.default_provider_id + if default_provider in available_providers: + provider_id = default_provider + logger.debug(f"Using configured default vector store provider: {provider_id}") + else: + raise ValueError( + f"Configured default vector store provider '{default_provider}' not found. " + f"Available providers: {available_providers}" + ) + else: + raise ValueError( + f"Multiple vector_io providers available. Please specify provider_id in extra_body. " + f"Available providers: {available_providers}" + ) + else: + provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] - vector_db_id = f"vs_{uuid.uuid4()}" - registered_vector_db = await self.routing_table.register_vector_db( - vector_db_id=vector_db_id, + vector_store_id = f"vs_{uuid.uuid4()}" + registered_vector_store = await self.routing_table.register_vector_store( + vector_store_id=vector_store_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension, provider_id=provider_id, - provider_vector_db_id=vector_db_id, - vector_db_name=params.name, + provider_vector_store_id=vector_store_id, + vector_store_name=params.name, ) - provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) + provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier) - # Update model_extra with registered values so provider uses the already-registered vector_db + # Update model_extra with registered values so provider uses the already-registered vector_store if params.model_extra is None: params.model_extra = {} - params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id - params.model_extra["provider_id"] = registered_vector_db.provider_id + params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id + params.model_extra["provider_id"] = registered_vector_store.provider_id if embedding_model is not None: params.model_extra["embedding_model"] = embedding_model if embedding_dimension is not None: @@ -171,15 +179,15 @@ class VectorIORouter(VectorIO): logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") # Route to default provider for now - could aggregate from all providers in the future # call retrieve on each vector dbs to get list of vector stores - vector_dbs = await self.routing_table.get_all_with_type("vector_db") + vector_stores = await self.routing_table.get_all_with_type("vector_store") all_stores = [] - for vector_db in vector_dbs: + for vector_store in vector_stores: try: - provider = await self.routing_table.get_provider_impl(vector_db.identifier) - vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier) + provider = await self.routing_table.get_provider_impl(vector_store.identifier) + vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier) all_stores.append(vector_store) except Exception as e: - logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}") + logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}") continue # Sort by created_at @@ -243,8 +251,7 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_delete_vector_store(vector_store_id) + return await self.routing_table.openai_delete_vector_store(vector_store_id) async def openai_search_vector_store( self, diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 8df0a89a9..d6faf93c5 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable elif api == Api.safety: return await p.register_shield(obj) elif api == Api.vector_io: - return await p.register_vector_db(obj) + return await p.register_vector_store(obj) elif api == Api.datasetio: return await p.register_dataset(obj) elif api == Api.scoring: @@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None: api = get_impl_api(p) if api == Api.vector_io: - return await p.unregister_vector_db(obj.identifier) + return await p.unregister_vector_store(obj.identifier) elif api == Api.inference: return await p.unregister_model(obj.identifier) elif api == Api.safety: @@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable): elif api == Api.safety: p.shield_store = self elif api == Api.vector_io: - p.vector_db_store = self + p.vector_store_store = self elif api == Api.datasetio: p.dataset_store = self elif api == Api.scoring: @@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable): from .scoring_functions import ScoringFunctionsRoutingTable from .shields import ShieldsRoutingTable from .toolgroups import ToolGroupsRoutingTable + from .vector_stores import VectorStoresRoutingTable def apiname_object(): if isinstance(self, ModelsRoutingTable): return ("Inference", "model") elif isinstance(self, ShieldsRoutingTable): return ("Safety", "shield") + elif isinstance(self, VectorStoresRoutingTable): + return ("VectorIO", "vector_store") elif isinstance(self, DatasetsRoutingTable): return ("DatasetIO", "dataset") elif isinstance(self, ScoringFunctionsRoutingTable): diff --git a/llama_stack/core/routing_tables/vector_stores.py b/llama_stack/core/routing_tables/vector_stores.py new file mode 100644 index 000000000..c6c80a01e --- /dev/null +++ b/llama_stack/core/routing_tables/vector_stores.py @@ -0,0 +1,292 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError +from llama_stack.apis.models import ModelType +from llama_stack.apis.resource import ResourceType + +# Removed VectorStores import to avoid exposing public API +from llama_stack.apis.vector_io.vector_io import ( + SearchRankingOptions, + VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, + VectorStoreFileContentsResponse, + VectorStoreFileDeleteResponse, + VectorStoreFileObject, + VectorStoreFileStatus, + VectorStoreObject, + VectorStoreSearchResponsePage, +) +from llama_stack.core.datatypes import ( + VectorStoreWithOwner, +) +from llama_stack.log import get_logger + +from .common import CommonRoutingTableImpl, lookup_model + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class VectorStoresRoutingTable(CommonRoutingTableImpl): + """Internal routing table for vector_store operations. + + Does not inherit from VectorStores to avoid exposing public API endpoints. + Only provides internal routing functionality for VectorIORouter. + """ + + # Internal methods only - no public API exposure + + async def register_vector_store( + self, + vector_store_id: str, + embedding_model: str, + embedding_dimension: int | None = 384, + provider_id: str | None = None, + provider_vector_store_id: str | None = None, + vector_store_name: str | None = None, + ) -> Any: + if provider_id is None: + if len(self.impls_by_provider_id) > 0: + provider_id = list(self.impls_by_provider_id.keys())[0] + if len(self.impls_by_provider_id) > 1: + logger.warning( + f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}." + ) + else: + raise ValueError("No provider available. Please configure a vector_io provider.") + model = await lookup_model(self, embedding_model) + if model is None: + raise ModelNotFoundError(embedding_model) + if model.model_type != ModelType.embedding: + raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) + + vector_store = VectorStoreWithOwner( + identifier=vector_store_id, + type=ResourceType.vector_store.value, + provider_id=provider_id, + provider_resource_id=provider_vector_store_id, + embedding_model=embedding_model, + embedding_dimension=embedding_dimension, + vector_store_name=vector_store_name, + ) + await self.register_object(vector_store) + return vector_store + + async def openai_retrieve_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreObject: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) + + async def openai_update_vector_store( + self, + vector_store_id: str, + name: str | None = None, + expires_after: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( + vector_store_id=vector_store_id, + name=name, + expires_after=expires_after, + metadata=metadata, + ) + + async def openai_delete_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreDeleteResponse: + await self.assert_action_allowed("delete", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + result = await provider.openai_delete_vector_store(vector_store_id) + await self.unregister_vector_store(vector_store_id) + return result + + async def unregister_vector_store(self, vector_store_id: str) -> None: + """Remove the vector store from the routing table registry.""" + try: + vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id) + if vector_store_obj: + await self.unregister_object(vector_store_obj) + except Exception as e: + # Log the error but don't fail the operation + logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") + + async def openai_search_vector_store( + self, + vector_store_id: str, + query: str | list[str], + filters: dict[str, Any] | None = None, + max_num_results: int | None = 10, + ranking_options: SearchRankingOptions | None = None, + rewrite_query: bool | None = False, + search_mode: str | None = "vector", + ) -> VectorStoreSearchResponsePage: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( + vector_store_id=vector_store_id, + query=query, + filters=filters, + max_num_results=max_num_results, + ranking_options=ranking_options, + rewrite_query=rewrite_query, + search_mode=search_mode, + ) + + async def openai_attach_file_to_vector_store( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any] | None = None, + chunking_strategy: VectorStoreChunkingStrategy | None = None, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_list_files_in_vector_store( + self, + vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, + ) -> list[VectorStoreFileObject]: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( + vector_store_id=vector_store_id, + limit=limit, + order=order, + after=after, + before=before, + filter=filter, + ) + + async def openai_retrieve_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_retrieve_vector_store_file_contents( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileContentsResponse: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_contents( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_update_vector_store_file( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any], + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + ) + + async def openai_delete_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileDeleteResponse: + await self.assert_action_allowed("delete", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_create_vector_store_file_batch( + self, + vector_store_id: str, + file_ids: list[str], + attributes: dict[str, Any] | None = None, + chunking_strategy: Any | None = None, + ): + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch( + vector_store_id=vector_store_id, + file_ids=file_ids, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_retrieve_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) + + async def openai_list_files_in_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + after: str | None = None, + before: str | None = None, + filter: str | None = None, + limit: int | None = 20, + order: str | None = "desc", + ): + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + after=after, + before=before, + filter=filter, + limit=limit, + order=order, + ) + + async def openai_cancel_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) diff --git a/llama_stack/core/server/auth_providers.py b/llama_stack/core/server/auth_providers.py index 05a21c8d4..0fe5f1558 100644 --- a/llama_stack/core/server/auth_providers.py +++ b/llama_stack/core/server/auth_providers.py @@ -72,13 +72,30 @@ class AuthProvider(ABC): def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]: attributes: dict[str, list[str]] = {} for claim_key, attribute_key in mapping.items(): - if claim_key not in claims: + # First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles") + # Then fall back to literal key with dots (e.g., "my.dotted.key") + claim: object = claims + keys = claim_key.split(".") + for key in keys: + if isinstance(claim, dict) and key in claim: + claim = claim[key] + else: + claim = None + break + + if claim is None and claim_key in claims: + # Fall back to checking if claim_key exists as a literal key + claim = claims[claim_key] + + if claim is None: continue - claim = claims[claim_key] + if isinstance(claim, list): values = claim - else: + elif isinstance(claim, str): values = claim.split() + else: + continue if attribute_key in attributes: attributes[attribute_key].extend(values) diff --git a/llama_stack/core/server/quota.py b/llama_stack/core/server/quota.py index 693f224c3..689f0e4c3 100644 --- a/llama_stack/core/server/quota.py +++ b/llama_stack/core/server/quota.py @@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta from starlette.types import ASGIApp, Receive, Scope, Send +from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl +from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl logger = get_logger(name=__name__, category="core::server") @@ -33,7 +33,7 @@ class QuotaMiddleware: def __init__( self, app: ASGIApp, - kv_config: KVStoreConfig, + kv_config: KVStoreReference, anonymous_max_requests: int, authenticated_max_requests: int, window_seconds: int = 86400, @@ -45,15 +45,15 @@ class QuotaMiddleware: self.authenticated_max_requests = authenticated_max_requests self.window_seconds = window_seconds - if isinstance(self.kv_config, SqliteKVStoreConfig): - logger.warning( - "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " - f"window_seconds={self.window_seconds}" - ) - async def _get_kv(self) -> KVStore: if self.kv is None: self.kv = await kvstore_impl(self.kv_config) + backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend) + if backend_config and backend_config.type == StorageBackendType.KV_SQLITE: + logger.warning( + "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " + f"window_seconds={self.window_seconds}" + ) return self.kv async def __call__(self, scope: Scope, receive: Receive, send: Send): diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py index 4970d0bf8..ed76ea86f 100644 --- a/llama_stack/core/server/routes.py +++ b/llama_stack/core/server/routes.py @@ -13,7 +13,6 @@ from aiohttp import hdrs from starlette.routing import Route from llama_stack.apis.datatypes import Api, ExternalApiSpec -from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.core.resolver import api_protocol_map from llama_stack.schema_utils import WebMethod @@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl] RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod] -def toolgroup_protocol_map(): - return { - SpecialToolGroup.rag_tool: RAGToolRuntime, - } - - def get_all_api_routes( external_apis: dict[Api, ExternalApiSpec] | None = None, ) -> dict[Api, list[tuple[Route, WebMethod]]]: apis = {} protocols = api_protocol_map(external_apis) - toolgroup_protocols = toolgroup_protocol_map() for api, protocol in protocols.items(): routes = [] protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) - # HACK ALERT - if api == Api.tool_runtime: - for tool_group in SpecialToolGroup: - sub_protocol = toolgroup_protocols[tool_group] - sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction) - for name, method in sub_protocol_methods: - if not hasattr(method, "__webmethod__"): - continue - protocol_methods.append((f"{tool_group.value}.{name}", method)) - for name, method in protocol_methods: # Get all webmethods for this method (supports multiple decorators) webmethods = getattr(method, "__webmethods__", []) diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 733b55262..49100b4bc 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -32,16 +32,26 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration from llama_stack.apis.telemetry import Telemetry -from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime +from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, StackRunConfig +from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl from llama_stack.core.providers import ProviderImpl, ProviderImplConfig from llama_stack.core.resolver import ProviderRegistry, resolve_impls from llama_stack.core.routing_tables.common import CommonRoutingTableImpl +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageConfig, +) from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger @@ -70,7 +80,6 @@ class LlamaStack( Inspect, ToolGroups, ToolRuntime, - RAGToolRuntime, Files, Prompts, Conversations, @@ -98,33 +107,9 @@ REGISTRY_REFRESH_TASK = None TEST_RECORDING_CONTEXT = None -async def validate_default_embedding_model(impls: dict[Api, Any]): - """Validate that at most one embedding model is marked as default.""" - if Api.models not in impls: - return - - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = response.data if hasattr(response, "data") else response - - default_embedding_models = [] - for model in models_list: - if model.model_type == "embedding" and model.metadata.get("default_configured") is True: - default_embedding_models.append(model.identifier) - - if len(default_embedding_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. " - "Only one embedding model can be marked as default." - ) - - if default_embedding_models: - logger.info(f"Default embedding model configured: {default_embedding_models[0]}") - - async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): for rsrc, api, register_method, list_method in RESOURCES: - objects = getattr(run_config, rsrc) + objects = getattr(run_config.registered_resources, rsrc) if api not in impls: continue @@ -152,7 +137,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}", ) - await validate_default_embedding_model(impls) + +async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]): + """Validate vector stores configuration.""" + if vector_stores_config is None: + return + + default_embedding_model = vector_stores_config.default_embedding_model + if default_embedding_model is None: + return + + provider_id = default_embedding_model.provider_id + model_id = default_embedding_model.model_id + default_model_id = f"{provider_id}/{model_id}" + + if Api.models not in impls: + raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") + + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} + + default_model = models_list.get(default_model_id) + if default_model is None: + raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}") + + embedding_dimension = default_model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + + logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") class EnvVarError(Exception): @@ -329,6 +348,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf impls[Api.conversations] = conversations_impl +def _initialize_storage(run_config: StackRunConfig): + kv_backends: dict[str, StorageBackendConfig] = {} + sql_backends: dict[str, StorageBackendConfig] = {} + for backend_name, backend_config in run_config.storage.backends.items(): + type = backend_config.type.value + if type.startswith("kv_"): + kv_backends[backend_name] = backend_config + elif type.startswith("sql_"): + sql_backends[backend_name] = backend_config + else: + raise ValueError(f"Unknown storage backend type: {type}") + + from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends + from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + register_kvstore_backends(kv_backends) + register_sqlstore_backends(sql_backends) + + class Stack: def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None): self.run_config = run_config @@ -347,7 +385,11 @@ class Stack: TEST_RECORDING_CONTEXT.__enter__() logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}") - dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name) + _initialize_storage(self.run_config) + stores = self.run_config.storage.stores + if not stores.metadata: + raise ValueError("storage.stores.metadata must be configured with a kv_* backend") + dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name) policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else [] internal_impls = {} @@ -367,8 +409,8 @@ class Stack: await impls[Api.conversations].initialize() await register_resources(self.run_config, impls) - await refresh_registry_once(impls) + await validate_vector_stores_config(self.run_config.vector_stores, impls) self.impls = impls def create_registry_refresh_task(self): @@ -488,5 +530,16 @@ def run_config_from_adhoc_config_spec( image_name="distro-test", apis=list(provider_configs_by_api.keys()), providers=provider_configs_by_api, + storage=StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"), + "sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + ), + ), ) return config diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/llama_stack/core/storage/__init__.py similarity index 100% rename from llama_stack/providers/inline/tool_runtime/__init__.py rename to llama_stack/core/storage/__init__.py diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py new file mode 100644 index 000000000..9df170e10 --- /dev/null +++ b/llama_stack/core/storage/datatypes.py @@ -0,0 +1,283 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +from abc import abstractmethod +from enum import StrEnum +from pathlib import Path +from typing import Annotated, Literal + +from pydantic import BaseModel, Field, field_validator + + +class StorageBackendType(StrEnum): + KV_REDIS = "kv_redis" + KV_SQLITE = "kv_sqlite" + KV_POSTGRES = "kv_postgres" + KV_MONGODB = "kv_mongodb" + SQL_SQLITE = "sql_sqlite" + SQL_POSTGRES = "sql_postgres" + + +class CommonConfig(BaseModel): + namespace: str | None = Field( + default=None, + description="All keys will be prefixed with this namespace", + ) + + +class RedisKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS + host: str = "localhost" + port: int = 6379 + + @property + def url(self) -> str: + return f"redis://{self.host}:{self.port}" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["redis"] + + @classmethod + def sample_run_config(cls): + return { + "type": StorageBackendType.KV_REDIS.value, + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", + } + + +class SqliteKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE + db_path: str = Field( + description="File path for the sqlite database", + ) + + @classmethod + def pip_packages(cls) -> list[str]: + return ["aiosqlite"] + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): + return { + "type": StorageBackendType.KV_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + +class PostgresKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + ssl_mode: str | None = None + ca_cert_path: str | None = None + table_name: str = "llamastack_kvstore" + + @classmethod + def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): + return { + "type": StorageBackendType.KV_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", + } + + @classmethod + @field_validator("table_name") + def validate_table_name(cls, v: str) -> str: + # PostgreSQL identifiers rules: + # - Must start with a letter or underscore + # - Can contain letters, numbers, and underscores + # - Maximum length is 63 bytes + pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" + if not re.match(pattern, v): + raise ValueError( + "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" + ) + if len(v) > 63: + raise ValueError("Table name must be less than 63 characters") + return v + + @classmethod + def pip_packages(cls) -> list[str]: + return ["psycopg2-binary"] + + +class MongoDBKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB + host: str = "localhost" + port: int = 27017 + db: str = "llamastack" + user: str | None = None + password: str | None = None + collection_name: str = "llamastack_kvstore" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["pymongo"] + + @classmethod + def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): + return { + "type": StorageBackendType.KV_MONGODB.value, + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", + "db": "${env.MONGODB_DB}", + "user": "${env.MONGODB_USER}", + "password": "${env.MONGODB_PASSWORD}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", + } + + +class SqlAlchemySqlStoreConfig(BaseModel): + @property + @abstractmethod + def engine_str(self) -> str: ... + + # TODO: move this when we have a better way to specify dependencies with internal APIs + @classmethod + def pip_packages(cls) -> list[str]: + return ["sqlalchemy[asyncio]"] + + +class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE + db_path: str = Field( + description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", + ) + + @property + def engine_str(self) -> str: + return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): + return { + "type": StorageBackendType.SQL_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["aiosqlite"] + + +class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + + @property + def engine_str(self) -> str: + return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["asyncpg"] + + @classmethod + def sample_run_config(cls, **kwargs): + return { + "type": StorageBackendType.SQL_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + } + + +# reference = (backend_name, table_name) +class SqlStoreReference(BaseModel): + """A reference to a 'SQL-like' persistent store. A table name must be provided.""" + + table_name: str = Field( + description="Name of the table to use for the SqlStore", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +# reference = (backend_name, namespace) +class KVStoreReference(BaseModel): + """A reference to a 'key-value' persistent store. A namespace must be provided.""" + + namespace: str = Field( + description="Key prefix for KVStore backends", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +StorageBackendConfig = Annotated[ + RedisKVStoreConfig + | SqliteKVStoreConfig + | PostgresKVStoreConfig + | MongoDBKVStoreConfig + | SqliteSqlStoreConfig + | PostgresSqlStoreConfig, + Field(discriminator="type"), +] + + +class InferenceStoreReference(SqlStoreReference): + """Inference store configuration with queue tuning.""" + + max_write_queue_size: int = Field( + default=10000, + description="Max queued writes for inference store", + ) + num_writers: int = Field( + default=4, + description="Number of concurrent background writers", + ) + + +class ResponsesStoreReference(InferenceStoreReference): + """Responses store configuration with queue tuning.""" + + +class ServerStoresConfig(BaseModel): + metadata: KVStoreReference | None = Field( + default=None, + description="Metadata store configuration (uses KV backend)", + ) + inference: InferenceStoreReference | None = Field( + default=None, + description="Inference store configuration (uses SQL backend)", + ) + conversations: SqlStoreReference | None = Field( + default=None, + description="Conversations store configuration (uses SQL backend)", + ) + responses: ResponsesStoreReference | None = Field( + default=None, + description="Responses store configuration (uses SQL backend)", + ) + + +class StorageConfig(BaseModel): + backends: dict[str, StorageBackendConfig] = Field( + description="Named backend configurations (e.g., 'default', 'cache')", + ) + stores: ServerStoresConfig = Field( + default_factory=lambda: ServerStoresConfig(), + description="Named references to storage backends used by the stack core", + ) diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py index 04581bab5..6ff9e575b 100644 --- a/llama_stack/core/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -11,10 +11,9 @@ from typing import Protocol import pydantic from llama_stack.core.datatypes import RoutableObjectWithProvider -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig logger = get_logger(__name__, category="core::registry") @@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry): async def create_dist_registry( - metadata_store: KVStoreConfig | None, - image_name: str, + metadata_store: KVStoreReference, image_name: str ) -> tuple[CachedDiskDistributionRegistry, KVStore]: # instantiate kvstore for storing and retrieving distribution metadata - if metadata_store: - dist_kvstore = await kvstore_impl(metadata_store) - else: - dist_kvstore = await kvstore_impl( - SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix()) - ) + dist_kvstore = await kvstore_impl(metadata_store) dist_registry = CachedDiskDistributionRegistry(dist_kvstore) await dist_registry.initialize() return dist_registry, dist_kvstore diff --git a/llama_stack/core/ui/README.md b/llama_stack/core/ui/README.md index f1d85454b..37f1501c9 100644 --- a/llama_stack/core/ui/README.md +++ b/llama_stack/core/ui/README.md @@ -9,7 +9,7 @@ 1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll). ``` -llama stack build --distro together --image-type venv +llama stack list-deps together | xargs -L1 uv pip install llama stack run together ``` diff --git a/llama_stack/core/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py index 4ee9d2204..16fd464ee 100644 --- a/llama_stack/core/ui/page/playground/tools.py +++ b/llama_stack/core/ui/page/playground/tools.py @@ -32,7 +32,7 @@ def tool_chat_page(): tool_groups_list = [tool_group.identifier for tool_group in tool_groups] mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] - selected_vector_dbs = [] + selected_vector_stores = [] def reset_agent(): st.session_state.clear() @@ -55,13 +55,13 @@ def tool_chat_page(): ) if "builtin::rag" in toolgroup_selection: - vector_dbs = llama_stack_api.client.vector_dbs.list() or [] - if not vector_dbs: + vector_stores = llama_stack_api.client.vector_stores.list() or [] + if not vector_stores: st.info("No vector databases available for selection.") - vector_dbs = [vector_db.identifier for vector_db in vector_dbs] - selected_vector_dbs = st.multiselect( + vector_stores = [vector_store.identifier for vector_store in vector_stores] + selected_vector_stores = st.multiselect( label="Select Document Collections to use in RAG queries", - options=vector_dbs, + options=vector_stores, on_change=reset_agent, ) @@ -119,7 +119,7 @@ def tool_chat_page(): tool_dict = dict( name="builtin::rag", args={ - "vector_db_ids": list(selected_vector_dbs), + "vector_store_ids": list(selected_vector_stores), }, ) toolgroup_selection[i] = tool_dict diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml index 191d0ae59..3cf43de15 100644 --- a/llama_stack/distributions/ci-tests/build.yaml +++ b/llama_stack/distributions/ci-tests/build.yaml @@ -25,6 +25,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: @@ -46,7 +48,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index a6a6b7c0d..f403527fc 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -163,21 +181,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -198,8 +216,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -207,35 +223,50 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db -models: [] -shields: -- shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} -- shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/dell/build.yaml b/llama_stack/distributions/dell/build.yaml index 7bc26ca9e..0275a47a1 100644 --- a/llama_stack/distributions/dell/build.yaml +++ b/llama_stack/distributions/dell/build.yaml @@ -26,7 +26,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime image_type: venv additional_pip_packages: - aiosqlite diff --git a/llama_stack/distributions/dell/dell.py b/llama_stack/distributions/dell/dell.py index 88e72688f..708ba0b10 100644 --- a/llama_stack/distributions/dell/dell.py +++ b/llama_stack/distributions/dell/dell.py @@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), ], } name = "dell" @@ -98,10 +97,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="brave-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] return DistributionTemplate( diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md index 852e78d0e..4e28673e8 100644 --- a/llama_stack/distributions/dell/doc_template.md +++ b/llama_stack/distributions/dell/doc_template.md @@ -157,7 +157,7 @@ docker run \ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. ```bash -llama stack build --distro {{ name }} --image-type conda +llama stack list-deps {{ name }} | xargs -L1 pip install INFERENCE_MODEL=$INFERENCE_MODEL \ DEH_URL=$DEH_URL \ CHROMA_URL=$CHROMA_URL \ diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index 5da3cf511..062c50e2b 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -26,9 +26,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,32 +38,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -84,42 +87,50 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: tgi1 - model_type: llm -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: ${env.SAFETY_MODEL} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: brave-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: tgi1 + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: ${env.SAFETY_MODEL} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: brave-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index ac0fdc0fa..42e0658bd 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -22,9 +22,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,32 +34,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -80,37 +83,45 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: brave-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: brave-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/meta-reference-gpu/build.yaml b/llama_stack/distributions/meta-reference-gpu/build.yaml index 1513742a7..74da29bb8 100644 --- a/llama_stack/distributions/meta-reference-gpu/build.yaml +++ b/llama_stack/distributions/meta-reference-gpu/build.yaml @@ -24,7 +24,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/llama_stack/distributions/meta-reference-gpu/meta_reference.py index 4e4ddef33..aa66d43a0 100644 --- a/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -47,7 +47,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -92,10 +91,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] return DistributionTemplate( diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 874c5050f..6e74201db 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -37,9 +37,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,32 +49,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -95,44 +98,52 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: meta-reference-safety - model_type: llm -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: ${env.SAFETY_MODEL} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: meta-reference-safety + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: ${env.SAFETY_MODEL} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index 50553d2c7..92934ca74 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -27,9 +27,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,32 +39,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -85,39 +88,47 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/nvidia/build.yaml b/llama_stack/distributions/nvidia/build.yaml index 8ddd12439..3412ea15b 100644 --- a/llama_stack/distributions/nvidia/build.yaml +++ b/llama_stack/distributions/nvidia/build.yaml @@ -19,8 +19,7 @@ distribution_spec: - provider_type: remote::nvidia scoring: - provider_type: inline::basic - tool_runtime: - - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_type: inline::localfs image_type: venv diff --git a/llama_stack/distributions/nvidia/nvidia.py b/llama_stack/distributions/nvidia/nvidia.py index a92a2e6f8..889f83aa5 100644 --- a/llama_stack/distributions/nvidia/nvidia.py +++ b/llama_stack/distributions/nvidia/nvidia.py @@ -28,7 +28,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: BuildProvider(provider_type="remote::nvidia"), ], "scoring": [BuildProvider(provider_type="inline::basic")], - "tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")], + "tool_runtime": [], "files": [BuildProvider(provider_type="inline::localfs")], } @@ -66,12 +66,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: provider_id="nvidia", ) - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] + default_tool_groups: list[ToolGroupInput] = [] return DistributionTemplate( name=name, diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index e0482f67d..dca29ed2a 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -28,9 +28,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: nvidia provider_type: remote::nvidia @@ -41,12 +41,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: nvidia provider_type: remote::nvidia @@ -65,8 +68,8 @@ providers: provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default - provider_id: nvidia provider_type: remote::nvidia config: @@ -77,45 +80,53 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: nvidia - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: nvidia - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL} - provider_id: nvidia -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: nvidia + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: nvidia + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL} + provider_id: nvidia + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] server: port: 8321 telemetry: diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 950782eed..e35d9c44c 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -23,9 +23,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: nvidia provider_type: remote::nvidia @@ -36,12 +36,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: nvidia provider_type: remote::nvidia @@ -66,35 +69,43 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db -models: [] -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] server: port: 8321 telemetry: diff --git a/llama_stack/distributions/open-benchmark/build.yaml b/llama_stack/distributions/open-benchmark/build.yaml index 05acd98e3..9fc0e9eb0 100644 --- a/llama_stack/distributions/open-benchmark/build.yaml +++ b/llama_stack/distributions/open-benchmark/build.yaml @@ -28,7 +28,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/llama_stack/distributions/open-benchmark/open_benchmark.py index 2b7760894..cceec74fd 100644 --- a/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -118,7 +118,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -154,10 +153,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] models, _ = get_model_registry(available_models) diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index a738887b4..8f63e4417 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -39,16 +39,16 @@ providers: provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: @@ -57,9 +57,9 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -69,32 +69,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -115,122 +118,130 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db -models: -- metadata: {} - model_id: gpt-4o - provider_id: openai - provider_model_id: gpt-4o - model_type: llm -- metadata: {} - model_id: claude-3-5-sonnet-latest - provider_id: anthropic - provider_model_id: claude-3-5-sonnet-latest - model_type: llm -- metadata: {} - model_id: gemini/gemini-1.5-flash - provider_id: gemini - provider_model_id: gemini/gemini-1.5-flash - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: groq - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct - provider_id: together - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -shields: -- shield_id: meta-llama/Llama-Guard-3-8B -vector_dbs: [] -datasets: -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/simpleqa?split=train - metadata: {} - dataset_id: simpleqa -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all - metadata: {} - dataset_id: mmlu_cot -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main - metadata: {} - dataset_id: gpqa_cot -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/math_500?split=test - metadata: {} - dataset_id: math_500 -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/IfEval?split=train - metadata: {} - dataset_id: ifeval -- purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/docvqa?split=val - metadata: {} - dataset_id: docvqa -scoring_fns: [] -benchmarks: -- dataset_id: simpleqa - scoring_functions: - - llm-as-judge::405b-simpleqa - metadata: {} - benchmark_id: meta-reference-simpleqa -- dataset_id: mmlu_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-mmlu-cot -- dataset_id: gpqa_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-gpqa-cot -- dataset_id: math_500 - scoring_functions: - - basic::regex_parser_math_response - metadata: {} - benchmark_id: meta-reference-math-500 -- dataset_id: ifeval - scoring_functions: - - basic::ifeval - metadata: {} - benchmark_id: meta-reference-ifeval -- dataset_id: docvqa - scoring_functions: - - basic::docvqa - metadata: {} - benchmark_id: meta-reference-docvqa -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: gpt-4o + provider_id: openai + provider_model_id: gpt-4o + model_type: llm + - metadata: {} + model_id: claude-3-5-sonnet-latest + provider_id: anthropic + provider_model_id: claude-3-5-sonnet-latest + model_type: llm + - metadata: {} + model_id: gemini/gemini-1.5-flash + provider_id: gemini + provider_model_id: gemini/gemini-1.5-flash + model_type: llm + - metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm + - metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm + shields: + - shield_id: meta-llama/Llama-Guard-3-8B + vector_dbs: [] + datasets: + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/simpleqa?split=train + metadata: {} + dataset_id: simpleqa + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all + metadata: {} + dataset_id: mmlu_cot + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main + metadata: {} + dataset_id: gpqa_cot + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/math_500?split=test + metadata: {} + dataset_id: math_500 + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/IfEval?split=train + metadata: {} + dataset_id: ifeval + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/docvqa?split=val + metadata: {} + dataset_id: docvqa + scoring_fns: [] + benchmarks: + - dataset_id: simpleqa + scoring_functions: + - llm-as-judge::405b-simpleqa + metadata: {} + benchmark_id: meta-reference-simpleqa + - dataset_id: mmlu_cot + scoring_functions: + - basic::regex_parser_multiple_choice_answer + metadata: {} + benchmark_id: meta-reference-mmlu-cot + - dataset_id: gpqa_cot + scoring_functions: + - basic::regex_parser_multiple_choice_answer + metadata: {} + benchmark_id: meta-reference-gpqa-cot + - dataset_id: math_500 + scoring_functions: + - basic::regex_parser_math_response + metadata: {} + benchmark_id: meta-reference-math-500 + - dataset_id: ifeval + scoring_functions: + - basic::ifeval + metadata: {} + benchmark_id: meta-reference-ifeval + - dataset_id: docvqa + scoring_functions: + - basic::docvqa + metadata: {} + benchmark_id: meta-reference-docvqa + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/postgres-demo/build.yaml b/llama_stack/distributions/postgres-demo/build.yaml index 063dc3999..99b4edeb3 100644 --- a/llama_stack/distributions/postgres-demo/build.yaml +++ b/llama_stack/distributions/postgres-demo/build.yaml @@ -14,7 +14,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py index 1f3e88b3b..9f8d35cb1 100644 --- a/llama_stack/distributions/postgres-demo/postgres_demo.py +++ b/llama_stack/distributions/postgres-demo/postgres_demo.py @@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -66,10 +65,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] default_models = [ @@ -91,7 +86,6 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 768, }, ) - postgres_config = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -105,22 +99,16 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": inference_providers + [embedding_provider], "vector_io": vector_io_providers, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - metadata_store=PostgresKVStoreConfig.sample_run_config(), - inference_store=postgres_config, + storage_backends={ + "kv_default": PostgresKVStoreConfig.sample_run_config( + table_name="llamastack_kvstore", + ), + "sql_default": PostgresSqlStoreConfig.sample_run_config(), + }, ), }, run_config_env_vars={ diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index 62faf3f62..67222969c 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -22,9 +22,9 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,20 +34,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -59,49 +54,57 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: meta-llama/Llama-Guard-3-8B -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: meta-llama/Llama-Guard-3-8B + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml index 943c6134d..678d7995d 100644 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ b/llama_stack/distributions/starter-gpu/build.yaml @@ -26,6 +26,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: @@ -47,7 +49,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 370d4b516..4764dc02c 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: huggingface-gpu provider_type: inline::huggingface-gpu @@ -166,21 +184,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -201,8 +219,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -210,35 +226,50 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db -models: [] -shields: -- shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} -- shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml index c2719d50d..e6cd3c688 100644 --- a/llama_stack/distributions/starter/build.yaml +++ b/llama_stack/distributions/starter/build.yaml @@ -26,6 +26,8 @@ distribution_spec: - provider_type: inline::milvus - provider_type: remote::chromadb - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate files: - provider_type: inline::localfs safety: @@ -47,7 +49,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 2f4e7f350..88358501e 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -93,30 +93,30 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db + persistence: + namespace: vector_io::milvus + backend: kv_default - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: @@ -125,17 +125,32 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -147,12 +162,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -163,21 +181,21 @@ providers: provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -198,8 +216,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -207,35 +223,50 @@ providers: provider_type: inline::reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db -models: [] -shields: -- shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} -- shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index f87ebcc5f..bad6279bd 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -11,8 +11,10 @@ from llama_stack.core.datatypes import ( BuildProvider, Provider, ProviderSpec, + QualifiedModel, ShieldInput, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings @@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC from llama_stack.providers.remote.vector_io.pgvector.config import ( PGVectorVectorIOConfig, ) +from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig +from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig @@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: BuildProvider(provider_type="inline::milvus"), BuildProvider(provider_type="remote::chromadb"), BuildProvider(provider_type="remote::pgvector"), + BuildProvider(provider_type="remote::qdrant"), + BuildProvider(provider_type="remote::weaviate"), ], "files": [BuildProvider(provider_type="inline::localfs")], "safety": [ @@ -134,7 +140,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], "batches": [ @@ -156,10 +161,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] default_shields = [ # if the @@ -221,12 +222,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: password="${env.PGVECTOR_PASSWORD:=}", ), ), + Provider( + provider_id="${env.QDRANT_URL:+qdrant}", + provider_type="remote::qdrant", + config=QdrantVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + url="${env.QDRANT_URL:=}", + ), + ), + Provider( + provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", + provider_type="remote::weaviate", + config=WeaviateVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", + ), + ), ], "files": [files_provider], }, default_models=[], default_tool_groups=default_tool_groups, default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), ), }, run_config_env_vars={ diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index 807829999..64f21e626 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -27,8 +27,15 @@ from llama_stack.core.datatypes import ( ShieldInput, TelemetryConfig, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + SqlStoreReference, + StorageBackendType, +) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry @@ -180,10 +187,10 @@ class RunConfigSettings(BaseModel): default_tool_groups: list[ToolGroupInput] | None = None default_datasets: list[DatasetInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None - metadata_store: dict | None = None - inference_store: dict | None = None - conversations_store: dict | None = None + vector_stores_config: VectorStoresConfig | None = None telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) + storage_backends: dict[str, Any] | None = None + storage_stores: dict[str, Any] | None = None def run_config( self, @@ -226,41 +233,65 @@ class RunConfigSettings(BaseModel): # Get unique set of APIs from providers apis = sorted(providers.keys()) + storage_backends = self.storage_backends or { + "kv_default": SqliteKVStoreConfig.sample_run_config( + __distro_dir__=f"~/.llama/distributions/{name}", + db_name="kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig.sample_run_config( + __distro_dir__=f"~/.llama/distributions/{name}", + db_name="sql_store.db", + ), + } + + storage_stores = self.storage_stores or { + "metadata": KVStoreReference( + backend="kv_default", + namespace="registry", + ).model_dump(exclude_none=True), + "inference": InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ).model_dump(exclude_none=True), + "conversations": SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ).model_dump(exclude_none=True), + } + + storage_config = dict( + backends=storage_backends, + stores=storage_stores, + ) + # Return a dict that matches StackRunConfig structure - return { + config = { "version": LLAMA_STACK_RUN_CONFIG_VERSION, "image_name": name, "container_image": container_image, "apis": apis, "providers": provider_configs, - "metadata_store": self.metadata_store - or SqliteKVStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="registry.db", - ), - "inference_store": self.inference_store - or SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="inference_store.db", - ), - "conversations_store": self.conversations_store - or SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=f"~/.llama/distributions/{name}", - db_name="conversations.db", - ), - "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], - "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], - "vector_dbs": [], - "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])], - "scoring_fns": [], - "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])], - "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])], + "storage": storage_config, + "registered_resources": { + "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], + "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], + "vector_dbs": [], + "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])], + "scoring_fns": [], + "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])], + "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])], + }, "server": { "port": 8321, }, "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } + if self.vector_stores_config: + config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True) + + return config + class DistributionTemplate(BaseModel): """ @@ -297,11 +328,15 @@ class DistributionTemplate(BaseModel): # We should have a better way to do this by formalizing the concept of "internal" APIs # and providers, with a way to specify dependencies for them. - if run_config_.get("inference_store"): - additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"])) - - if run_config_.get("metadata_store"): - additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"])) + storage_cfg = run_config_.get("storage", {}) + for backend_cfg in storage_cfg.get("backends", {}).values(): + store_type = backend_cfg.get("type") + if not store_type: + continue + if str(store_type).startswith("kv_"): + additional_pip_packages.extend(get_kv_pip_packages(backend_cfg)) + elif str(store_type).startswith("sql_"): + additional_pip_packages.extend(get_sql_pip_packages(backend_cfg)) if self.additional_pip_packages: additional_pip_packages.extend(self.additional_pip_packages) @@ -387,11 +422,13 @@ class DistributionTemplate(BaseModel): def enum_representer(dumper, data): return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) - # Register YAML representer for ModelType + # Register YAML representer for enums yaml.add_representer(ModelType, enum_representer) yaml.add_representer(DatasetPurpose, enum_representer) + yaml.add_representer(StorageBackendType, enum_representer) yaml.SafeDumper.add_representer(ModelType, enum_representer) yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer) + yaml.SafeDumper.add_representer(StorageBackendType, enum_representer) for output_dir in [yaml_output_dir, doc_output_dir]: output_dir.mkdir(parents=True, exist_ok=True) diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml index dba1a94e2..d2c396085 100644 --- a/llama_stack/distributions/watsonx/build.yaml +++ b/llama_stack/distributions/watsonx/build.yaml @@ -23,7 +23,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol files: - provider_type: inline::localfs diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index c3db4eeb8..ddc7e095f 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -22,9 +22,9 @@ providers: - provider_id: faiss provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db + persistence: + namespace: vector_io::faiss + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -34,32 +34,35 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 eval: - provider_id: meta-reference provider_type: inline::meta-reference config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db + namespace: eval + backend: kv_default datasetio: - provider_id: huggingface provider_type: remote::huggingface config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db + namespace: datasetio::huggingface + backend: kv_default - provider_id: localfs provider_type: inline::localfs config: kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db + namespace: datasetio::localfs + backend: kv_default scoring: - provider_id: basic provider_type: inline::basic @@ -80,8 +83,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol files: @@ -90,28 +91,38 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db -conversations_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db -models: [] -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search server: port: 8321 telemetry: diff --git a/llama_stack/distributions/watsonx/watsonx.py b/llama_stack/distributions/watsonx/watsonx.py index d79aea872..b16f76fcb 100644 --- a/llama_stack/distributions/watsonx/watsonx.py +++ b/llama_stack/distributions/watsonx/watsonx.py @@ -33,7 +33,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], "files": [BuildProvider(provider_type="inline::localfs")], @@ -50,10 +49,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] files_provider = Provider( diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index c8ff9cecb..9be3edb8e 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -17,7 +17,7 @@ from llama_stack.apis.models import Model from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.apis.shields import Shield from llama_stack.apis.tools import ToolGroup -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.schema_utils import json_schema_type @@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol): async def unregister_shield(self, identifier: str) -> None: ... -class VectorDBsProtocolPrivate(Protocol): - async def register_vector_db(self, vector_db: VectorDB) -> None: ... +class VectorStoresProtocolPrivate(Protocol): + async def register_vector_store(self, vector_store: VectorStore) -> None: ... - async def unregister_vector_db(self, vector_db_id: str) -> None: ... + async def unregister_vector_store(self, vector_store_id: str) -> None: ... class DatasetsProtocolPrivate(Protocol): diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 810c063e6..c2f6ea640 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents): self.policy = policy async def initialize(self) -> None: - self.persistence_store = await kvstore_impl(self.config.persistence_store) - self.responses_store = ResponsesStore(self.config.responses_store, self.policy) + self.persistence_store = await kvstore_impl(self.config.persistence.agent_state) + self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy) await self.responses_store.initialize() self.openai_responses_impl = OpenAIResponsesImpl( inference_api=self.inference_api, diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py index 1c392f29c..a800b426b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/config.py +++ b/llama_stack/providers/inline/agents/meta_reference/config.py @@ -8,24 +8,30 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore import KVStoreConfig -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference + + +class AgentPersistenceConfig(BaseModel): + """Nested persistence configuration for agents.""" + + agent_state: KVStoreReference + responses: ResponsesStoreReference class MetaReferenceAgentsImplConfig(BaseModel): - persistence_store: KVStoreConfig - responses_store: SqlStoreConfig + persistence: AgentPersistenceConfig @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "persistence_store": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="agents_store.db", - ), - "responses_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="responses_store.db", - ), + "persistence": { + "agent_state": KVStoreReference( + backend="kv_default", + namespace="agents", + ).model_dump(exclude_none=True), + "responses": ResponsesStoreReference( + backend="sql_default", + table_name="responses", + ).model_dump(exclude_none=True), + } } diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 851e6ef28..2360dafd9 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -359,6 +359,7 @@ class OpenAIResponsesImpl: tool_executor=self.tool_executor, safety_api=self.safety_api, guardrail_ids=guardrail_ids, + instructions=instructions, ) # Stream the response diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index caf899cdd..e80ffcdd1 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -110,6 +110,7 @@ class StreamingResponseOrchestrator: text: OpenAIResponseText, max_infer_iters: int, tool_executor, # Will be the tool execution logic from the main class + instructions: str, safety_api, guardrail_ids: list[str] | None = None, ): @@ -133,6 +134,8 @@ class StreamingResponseOrchestrator: self.accumulated_usage: OpenAIResponseUsage | None = None # Track if we've sent a refusal response self.violation_detected = False + # system message that is inserted into the model's context + self.instructions = instructions async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream: """Create a refusal response to replace streaming content.""" @@ -176,6 +179,7 @@ class StreamingResponseOrchestrator: tools=self.ctx.available_tools(), error=error, usage=self.accumulated_usage, + instructions=self.instructions, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: diff --git a/llama_stack/providers/inline/batches/reference/config.py b/llama_stack/providers/inline/batches/reference/config.py index d8d06868b..f896a897d 100644 --- a/llama_stack/providers/inline/batches/reference/config.py +++ b/llama_stack/providers/inline/batches/reference/config.py @@ -6,13 +6,13 @@ from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference class ReferenceBatchesImplConfig(BaseModel): """Configuration for the Reference Batches implementation.""" - kvstore: KVStoreConfig = Field( + kvstore: KVStoreReference = Field( description="Configuration for the key-value store backend.", ) @@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="batches.db", - ), + "kvstore": KVStoreReference( + backend="kv_default", + namespace="batches", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/llama_stack/providers/inline/datasetio/localfs/config.py index b450e8777..6e878df62 100644 --- a/llama_stack/providers/inline/datasetio/localfs/config.py +++ b/llama_stack/providers/inline/datasetio/localfs/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class LocalFSDatasetIOConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="localfs_datasetio.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="datasetio::localfs", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py index 2a4a29998..b496c855e 100644 --- a/llama_stack/providers/inline/eval/meta_reference/config.py +++ b/llama_stack/providers/inline/eval/meta_reference/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class MetaReferenceEvalConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="meta_reference_eval.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="eval", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py index 6c767af8f..0c2dd3b21 100644 --- a/llama_stack/providers/inline/files/localfs/config.py +++ b/llama_stack/providers/inline/files/localfs/config.py @@ -8,14 +8,14 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import SqlStoreReference class LocalfsFilesImplConfig(BaseModel): storage_dir: str = Field( description="Directory to store uploaded files", ) - metadata_store: SqlStoreConfig = Field( + metadata_store: SqlStoreReference = Field( description="SQL store configuration for file metadata", ) ttl_secs: int = 365 * 24 * 60 * 60 # 1 year @@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel): def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", - "metadata_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="files_metadata.db", - ), + "metadata_store": SqlStoreReference( + backend="sql_default", + table_name="files_metadata", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index 871adcb24..cb72aa13a 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl( provider_id=self.__provider_id__, metadata={ "embedding_dimension": 768, - "default_configured": True, }, model_type=ModelType.embedding, ), diff --git a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py deleted file mode 100644 index 78e49af94..000000000 --- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from datetime import UTC, datetime - -from opentelemetry.sdk.trace import ReadableSpan -from opentelemetry.sdk.trace.export import SpanProcessor -from opentelemetry.trace.status import StatusCode - -from llama_stack.log import get_logger - -logger = get_logger(name="console_span_processor", category="telemetry") - - -class ConsoleSpanProcessor(SpanProcessor): - def __init__(self, print_attributes: bool = False): - self.print_attributes = print_attributes - - def on_start(self, span: ReadableSpan, parent_context=None) -> None: - if span.attributes and span.attributes.get("__autotraced__"): - return - - timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]") - - def on_end(self, span: ReadableSpan) -> None: - timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]" - if span.status.status_code == StatusCode.ERROR: - span_context += " [bold red][ERROR][/bold red]" - elif span.status.status_code != StatusCode.UNSET: - span_context += f" [{span.status.status_code}]" - duration_ms = (span.end_time - span.start_time) / 1e6 - span_context += f" ({duration_ms:.2f}ms)" - logger.info(span_context) - - if self.print_attributes and span.attributes: - for key, value in span.attributes.items(): - if key.startswith("__"): - continue - str_value = str(value) - if len(str_value) > 1000: - str_value = str_value[:997] + "..." - logger.info(f" [dim]{key}[/dim]: {str_value}") - - for event in span.events: - event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - severity = event.attributes.get("severity", "info") - message = event.attributes.get("message", event.name) - if isinstance(message, dict) or isinstance(message, list): - message = json.dumps(message, indent=2) - severity_color = { - "error": "red", - "warn": "yellow", - "info": "white", - "debug": "dim", - }.get(severity, "white") - logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}") - if event.attributes: - for key, value in event.attributes.items(): - if key.startswith("__") or key in ["message", "severity"]: - continue - logger.info(f"[dim]{key}[/dim]: {value}") - - def shutdown(self) -> None: - """Shutdown the processor.""" - pass - - def force_flush(self, timeout_millis: float | None = None) -> bool: - """Force flush any pending spans.""" - return True diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 2a225476b..b15b1e490 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -79,8 +79,10 @@ class TelemetryAdapter(Telemetry): metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) metric_provider = MeterProvider(metric_readers=[metric_reader]) metrics.set_meter_provider(metric_provider) + self.is_otel_endpoint_set = True else: logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry") + self.is_otel_endpoint_set = False self.meter = metrics.get_meter(__name__) self._lock = _global_lock @@ -89,7 +91,8 @@ class TelemetryAdapter(Telemetry): pass async def shutdown(self) -> None: - trace.get_tracer_provider().force_flush() + if self.is_otel_endpoint_set: + trace.get_tracer_provider().force_flush() async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: if isinstance(event, UnstructuredLogEvent): diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py deleted file mode 100644 index f9a7e7b89..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import RagToolRuntimeConfig - - -async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]): - from .memory import MemoryToolRuntimeImpl - - impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files]) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/llama_stack/providers/inline/tool_runtime/rag/config.py deleted file mode 100644 index 43ba78e65..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/config.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel - - -class RagToolRuntimeConfig(BaseModel): - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return {} diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py deleted file mode 100644 index 14cbec49d..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from jinja2 import Template - -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam -from llama_stack.apis.tools.rag_tool import ( - DefaultRAGQueryGeneratorConfig, - LLMRAGQueryGeneratorConfig, - RAGQueryGenerator, - RAGQueryGeneratorConfig, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) - - -async def generate_rag_query( - config: RAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - """ - Generates a query that will be used for - retrieving relevant information from the memory bank. - """ - if config.type == RAGQueryGenerator.default.value: - query = await default_rag_query_generator(config, content, **kwargs) - elif config.type == RAGQueryGenerator.llm.value: - query = await llm_rag_query_generator(config, content, **kwargs) - else: - raise NotImplementedError(f"Unsupported memory query generator {config.type}") - return query - - -async def default_rag_query_generator( - config: DefaultRAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - return interleaved_content_as_str(content, sep=config.separator) - - -async def llm_rag_query_generator( - config: LLMRAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - assert "inference_api" in kwargs, "LLMRAGQueryGenerator needs inference_api" - inference_api = kwargs["inference_api"] - - messages = [] - if isinstance(content, list): - messages = [interleaved_content_as_str(m) for m in content] - else: - messages = [interleaved_content_as_str(content)] - - template = Template(config.template) - rendered_content: str = template.render({"messages": messages}) - - model = config.model - message = OpenAIUserMessageParam(content=rendered_content) - params = OpenAIChatCompletionRequestWithExtraBody( - model=model, - messages=[message], - stream=False, - ) - response = await inference_api.openai_chat_completion(params) - - query = response.choices[0].message.content - - return query diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py deleted file mode 100644 index dc3dfbbca..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import base64 -import io -import mimetypes -from typing import Any - -import httpx -from fastapi import UploadFile -from pydantic import TypeAdapter - -from llama_stack.apis.common.content_types import ( - URL, - InterleavedContent, - InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import Inference -from llama_stack.apis.tools import ( - ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, - ToolDef, - ToolGroup, - ToolInvocationResult, - ToolRuntime, -) -from llama_stack.apis.vector_io import ( - QueryChunksResponse, - VectorIO, - VectorStoreChunkingStrategyStatic, - VectorStoreChunkingStrategyStaticConfig, -) -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.memory.vector_store import parse_data_url - -from .config import RagToolRuntimeConfig -from .context_retriever import generate_rag_query - -log = get_logger(name=__name__, category="tool_runtime") - - -async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: - """Get raw binary data and mime type from a RAGDocument for file upload.""" - if isinstance(doc.content, URL): - if doc.content.uri.startswith("data:"): - parts = parse_data_url(doc.content.uri) - mime_type = parts["mimetype"] - data = parts["data"] - - if parts["is_base64"]: - file_data = base64.b64decode(data) - else: - file_data = data.encode("utf-8") - - return file_data, mime_type - else: - async with httpx.AsyncClient() as client: - r = await client.get(doc.content.uri) - r.raise_for_status() - mime_type = r.headers.get("content-type", "application/octet-stream") - return r.content, mime_type - else: - if isinstance(doc.content, str): - content_str = doc.content - else: - content_str = interleaved_content_as_str(doc.content) - - if content_str.startswith("data:"): - parts = parse_data_url(content_str) - mime_type = parts["mimetype"] - data = parts["data"] - - if parts["is_base64"]: - file_data = base64.b64decode(data) - else: - file_data = data.encode("utf-8") - - return file_data, mime_type - else: - return content_str.encode("utf-8"), "text/plain" - - -class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime): - def __init__( - self, - config: RagToolRuntimeConfig, - vector_io_api: VectorIO, - inference_api: Inference, - files_api: Files, - ): - self.config = config - self.vector_io_api = vector_io_api - self.inference_api = inference_api - self.files_api = files_api - - async def initialize(self): - pass - - async def shutdown(self): - pass - - async def register_toolgroup(self, toolgroup: ToolGroup) -> None: - pass - - async def unregister_toolgroup(self, toolgroup_id: str) -> None: - return - - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - if not documents: - return - - for doc in documents: - try: - try: - file_data, mime_type = await raw_data_from_doc(doc) - except Exception as e: - log.error(f"Failed to extract content from document {doc.document_id}: {e}") - continue - - file_extension = mimetypes.guess_extension(mime_type) or ".txt" - filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}") - - file_obj = io.BytesIO(file_data) - file_obj.name = filename - - upload_file = UploadFile(file=file_obj, filename=filename) - - try: - created_file = await self.files_api.openai_upload_file( - file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS - ) - except Exception as e: - log.error(f"Failed to upload file for document {doc.document_id}: {e}") - continue - - chunking_strategy = VectorStoreChunkingStrategyStatic( - static=VectorStoreChunkingStrategyStaticConfig( - max_chunk_size_tokens=chunk_size_in_tokens, - chunk_overlap_tokens=chunk_size_in_tokens // 4, - ) - ) - - try: - await self.vector_io_api.openai_attach_file_to_vector_store( - vector_store_id=vector_db_id, - file_id=created_file.id, - attributes=doc.metadata, - chunking_strategy=chunking_strategy, - ) - except Exception as e: - log.error( - f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}" - ) - continue - - except Exception as e: - log.error(f"Unexpected error processing document {doc.document_id}: {e}") - continue - - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - if not vector_db_ids: - raise ValueError( - "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID." - ) - - query_config = query_config or RAGQueryConfig() - query = await generate_rag_query( - query_config.query_generator_config, - content, - inference_api=self.inference_api, - ) - tasks = [ - self.vector_io_api.query_chunks( - vector_db_id=vector_db_id, - query=query, - params={ - "mode": query_config.mode, - "max_chunks": query_config.max_chunks, - "score_threshold": 0.0, - "ranker": query_config.ranker, - }, - ) - for vector_db_id in vector_db_ids - ] - results: list[QueryChunksResponse] = await asyncio.gather(*tasks) - - chunks = [] - scores = [] - - for vector_db_id, result in zip(vector_db_ids, results, strict=False): - for chunk, score in zip(result.chunks, result.scores, strict=False): - if not hasattr(chunk, "metadata") or chunk.metadata is None: - chunk.metadata = {} - chunk.metadata["vector_db_id"] = vector_db_id - - chunks.append(chunk) - scores.append(score) - - if not chunks: - return RAGQueryResult(content=None) - - # sort by score - chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False) # type: ignore - chunks = chunks[: query_config.max_chunks] - - tokens = 0 - picked: list[InterleavedContentItem] = [ - TextContentItem( - text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n" - ) - ] - for i, chunk in enumerate(chunks): - metadata = chunk.metadata - tokens += metadata.get("token_count", 0) - tokens += metadata.get("metadata_token_count", 0) - - if tokens > query_config.max_tokens_in_context: - log.error( - f"Using {len(picked)} chunks; reached max tokens in context: {tokens}", - ) - break - - # Add useful keys from chunk_metadata to metadata and remove some from metadata - chunk_metadata_keys_to_include_from_context = [ - "chunk_id", - "document_id", - "source", - ] - metadata_keys_to_exclude_from_context = [ - "token_count", - "metadata_token_count", - "vector_db_id", - ] - metadata_for_context = {} - for k in chunk_metadata_keys_to_include_from_context: - metadata_for_context[k] = getattr(chunk.chunk_metadata, k) - for k in metadata: - if k not in metadata_keys_to_exclude_from_context: - metadata_for_context[k] = metadata[k] - - text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context) - picked.append(TextContentItem(text=text_content)) - - picked.append(TextContentItem(text="END of knowledge_search tool results.\n")) - picked.append( - TextContentItem( - text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n', - ) - ) - - return RAGQueryResult( - content=picked, - metadata={ - "document_ids": [c.document_id for c in chunks[: len(picked)]], - "chunks": [c.content for c in chunks[: len(picked)]], - "scores": scores[: len(picked)], - "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], - }, - ) - - async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None - ) -> ListToolDefsResponse: - # Parameters are not listed since these methods are not yet invoked automatically - # by the LLM. The method is only implemented so things like /tools can list without - # encountering fatals. - return ListToolDefsResponse( - data=[ - ToolDef( - name="insert_into_memory", - description="Insert documents into memory", - ), - ToolDef( - name="knowledge_search", - description="Search for information in a database.", - input_schema={ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The query to search for. Can be a natural language sentence or keywords.", - } - }, - "required": ["query"], - }, - ), - ] - ) - - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: - vector_db_ids = kwargs.get("vector_db_ids", []) - query_config = kwargs.get("query_config") - if query_config: - query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config) - else: - query_config = RAGQueryConfig() - - query = kwargs["query"] - result = await self.query( - content=query, - vector_db_ids=vector_db_ids, - query_config=query_config, - ) - - return ToolInvocationResult( - content=result.content or [], - metadata={ - **(result.metadata or {}), - "citation_files": getattr(result, "citation_files", None), - }, - ) diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py index 09e869c90..575e5ad88 100644 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): - from llama_stack.providers.remote.vector_io.chroma.chroma import ( - ChromaVectorIOAdapter, - ) + from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py index a9566f7ff..1798f10de 100644 --- a/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/llama_stack/providers/inline/vector_io/chroma/config.py @@ -8,14 +8,14 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class ChromaVectorIOConfig(BaseModel): db_path: str - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") @classmethod def sample_run_config( @@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel): ) -> dict[str, Any]: return { "db_path": db_path, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="chroma_inline_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index c0f01bc9d..24d1f292a 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = FaissVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py index cbcbb1762..dd7a7aeca 100644 --- a/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/llama_stack/providers/inline/vector_io/faiss/config.py @@ -8,22 +8,19 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class FaissVectorIOConfig(BaseModel): - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="faiss_store.db", - ) + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::faiss", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index df0864db8..5e33d4ca3 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -17,34 +17,21 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - HealthResponse, - HealthStatus, - VectorDBsProtocolPrivate, -) +from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import FaissVectorIOConfig logger = get_logger(name=__name__, category="vector_io") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" @@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex): await self._save_index() - async def query_vector( - self, - embedding: NDArray, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k) chunks = [] scores = [] @@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError( "Keyword search is not supported - underlying DB FAISS does not support this search mode" ) @@ -199,35 +176,28 @@ class FaissIndex(EmbeddingIndex): ) -class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): - def __init__( - self, - config: FaissVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: +class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): + def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api - self.cache: dict[str, VectorDBWithIndex] = {} + self.cache: dict[str, VectorStoreWithIndex] = {} async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) # Load existing banks from kvstore start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, - await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, + await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -252,45 +222,33 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set( - key=key, - value=vector_db.model_dump_json(), - ) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) # Store in cache - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, - index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, + index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, ) - async def list_vector_dbs(self) -> list[VectorDB]: - return [i.vector_db for i in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [i.vector_store for i in self.cache.values()] - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: assert self.kvstore is not None - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = self.cache.get(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}") @@ -298,10 +256,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: index = self.cache.get(vector_db_id) if index is None: diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py index 46a006a91..7dc9c6a33 100644 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter - impl = MilvusVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py index 8cbd056be..b333b04ea 100644 --- a/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/llama_stack/providers/inline/vector_io/milvus/config.py @@ -8,25 +8,22 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class MilvusVectorIOConfig(BaseModel): db_path: str - kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="milvus_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py index 2863f667c..bef6d50e6 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = QdrantVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py index e15c27ea1..e7ecde7b7 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -9,23 +9,21 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class QdrantVectorIOConfig(BaseModel): path: str - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, db_name="qdrant_registry.db" - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 93921fb23..df96e927c 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index 525ed4b1f..596f8fc95 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -8,22 +8,19 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class SQLiteVectorIOConfig(BaseModel): db_path: str = Field(description="Path to the SQLite database file") - kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="sqlite_vec_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::sqlite_vec", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 8bc3b04cb..37294f173 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -17,15 +17,10 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -33,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator @@ -46,7 +41,7 @@ HYBRID_SEARCH = "hybrid" SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH} VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::" @@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex): # Insert vector embeddings embedding_data = [ - ( - ( - chunk.chunk_id, - serialize_vector(emb.tolist()), - ) - ) + ((chunk.chunk_id, serialize_vector(emb.tolist()))) for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True) ] - cur.executemany( - f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", - embedding_data, - ) + cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data) # Insert FTS content fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks] # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT) - cur.executemany( - f"DELETE FROM [{self.fts_table}] WHERE id = ?;", - [(row[0],) for row in fts_data], - ) + cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data]) # INSERT new entries - cur.executemany( - f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", - fts_data, - ) + cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data) connection.commit() @@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex): # Run batch insertion in a background thread await asyncio.to_thread(_execute_all_batch_inserts) - async def query_vector( - self, - embedding: NDArray, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs vector-based search using a virtual table for vector similarity. """ @@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex): scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search. """ @@ -403,41 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex): await asyncio.to_thread(_delete_chunks) -class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): """ A VectorIO implementation using SQLite + sqlite_vec. - This class handles vector database registration (with metadata stored in a table named `vector_dbs`) - and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). + This class handles vector database registration (with metadata stored in a table named `vector_stores`) + and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex). """ - def __init__( - self, - config, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: + def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api - self.cache: dict[str, VectorDBWithIndex] = {} - self.vector_db_store = None + self.cache: dict[str, VectorStoreWithIndex] = {} + self.vector_store_table = None async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) - for db_json in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(db_json) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) + for db_json in stored_vector_stores: + vector_store = VectorStore.model_validate_json(db_json) index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, - self.config.db_path, - vector_db.identifier, + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -446,67 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc # Clean up mixin resources (file batch tasks) await super().shutdown() - async def list_vector_dbs(self) -> list[VectorDB]: - return [v.vector_db for v in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [v.vector_store for v in self.cache.values()] - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, - self.config.db_path, - vector_db.identifier, + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, + index = VectorStoreWithIndex( + vector_store=vector_store, index=SQLiteVecIndex( - dimension=vector_db.embedding_dimension, + dimension=vector_store.embedding_dimension, db_path=self.config.db_path, - bank_id=vector_db.identifier, + bank_id=vector_store.identifier, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) - # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api + # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api # and then call our index's add_chunks. await index.insert_chunks(chunks) async def query_chunks( self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a sqlite_vec index.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 35afb296d..2e52e2d12 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -42,6 +42,7 @@ def available_providers() -> list[ProviderSpec]: # CrossEncoder depends on torchao.quantization pip_packages=[ "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu", + "numpy tqdm transformers", "sentence-transformers --no-deps", # required by some SentenceTransformers architectures for tensor rearrange/merge ops "einops", diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py index 39dc7fccd..514d9d0a0 100644 --- a/llama_stack/providers/registry/tool_runtime.py +++ b/llama_stack/providers/registry/tool_runtime.py @@ -7,33 +7,13 @@ from llama_stack.providers.datatypes import ( Api, - InlineProviderSpec, ProviderSpec, RemoteProviderSpec, ) -from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS def available_providers() -> list[ProviderSpec]: return [ - InlineProviderSpec( - api=Api.tool_runtime, - provider_type="inline::rag-runtime", - pip_packages=DEFAULT_VECTOR_IO_DEPS - + [ - "tqdm", - "numpy", - "scikit-learn", - "scipy", - "nltk", - "sentencepiece", - "transformers", - ], - module="llama_stack.providers.inline.tool_runtime.rag", - config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", - api_dependencies=[Api.vector_io, Api.inference, Api.files], - description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.", - ), RemoteProviderSpec( api=Api.tool_runtime, adapter_type="brave-search", diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index ff3b8486f..db81ea35d 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -119,7 +119,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. +in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/llama_stack/providers/remote/datasetio/huggingface/config.py index 38f933728..35297cb58 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/config.py +++ b/llama_stack/providers/remote/datasetio/huggingface/config.py @@ -7,20 +7,17 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference class HuggingfaceDatasetIOConfig(BaseModel): - kvstore: KVStoreConfig + kvstore: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="huggingface_datasetio.db", - ) + "kvstore": KVStoreReference( + backend="kv_default", + namespace="datasetio::huggingface", + ).model_dump(exclude_none=True) } diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md index 74e0895f4..da57d5550 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/README.md +++ b/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service. Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/files/s3/config.py b/llama_stack/providers/remote/files/s3/config.py index da20d8668..cd4b1adda 100644 --- a/llama_stack/providers/remote/files/s3/config.py +++ b/llama_stack/providers/remote/files/s3/config.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.core.storage.datatypes import SqlStoreReference class S3FilesImplConfig(BaseModel): @@ -24,7 +24,7 @@ class S3FilesImplConfig(BaseModel): auto_create_bucket: bool = Field( default=False, description="Automatically create the S3 bucket if it doesn't exist" ) - metadata_store: SqlStoreConfig = Field(description="SQL store configuration for file metadata") + metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata") @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -35,8 +35,8 @@ class S3FilesImplConfig(BaseModel): "aws_secret_access_key": "${env.AWS_SECRET_ACCESS_KEY:=}", "endpoint_url": "${env.S3_ENDPOINT_URL:=}", "auto_create_bucket": "${env.S3_AUTO_CREATE_BUCKET:=false}", - "metadata_store": SqliteSqlStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="s3_files_metadata.db", - ), + "metadata_store": SqlStoreReference( + backend="sql_default", + table_name="s3_files_metadata", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 692b9125b..f1a828413 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM. Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/llama_stack/providers/remote/inference/nvidia/__init__.py index 1869cb748..b4926f33e 100644 --- a/llama_stack/providers/remote/inference/nvidia/__init__.py +++ b/llama_stack/providers/remote/inference/nvidia/__init__.py @@ -10,7 +10,7 @@ from .config import NVIDIAConfig async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference: - # import dynamically so `llama stack build` does not fail due to missing dependencies + # import dynamically so `llama stack list-deps` does not fail due to missing dependencies from .nvidia import NVIDIAInferenceAdapter if not isinstance(config, NVIDIAConfig): diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md index 9b088a615..789514b1e 100644 --- a/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/llama_stack/providers/remote/post_training/nvidia/README.md @@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md index 784ab464f..e589afe84 100644 --- a/llama_stack/providers/remote/safety/nvidia/README.md +++ b/llama_stack/providers/remote/safety/nvidia/README.md @@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py index a6db48c43..e4b77c68d 100644 --- a/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -12,11 +12,6 @@ from .config import ChromaVectorIOConfig async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): from .chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 5792a83c6..2663ad43e 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -12,24 +12,16 @@ import chromadb from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.inference import Inference, InterleavedContent +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -38,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma") ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::" @@ -68,19 +60,13 @@ class ChromaIndex(EmbeddingIndex): ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks] await maybe_await( - self.collection.add( - documents=[chunk.model_dump_json() for chunk in chunks], - embeddings=embeddings, - ids=ids, - ) + self.collection.add(documents=[chunk.model_dump_json() for chunk in chunks], embeddings=embeddings, ids=ids) ) async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: results = await maybe_await( self.collection.query( - query_embeddings=[embedding.tolist()], - n_results=k, - include=["documents", "distances"], + query_embeddings=[embedding.tolist()], n_results=k, include=["documents", "distances"] ) ) distances = results["distances"][0] @@ -108,12 +94,7 @@ class ChromaIndex(EmbeddingIndex): async def delete(self): await maybe_await(self.client.delete_collection(self.collection.name)) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError("Keyword search is not supported in Chroma") async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None: @@ -133,26 +114,24 @@ class ChromaIndex(EmbeddingIndex): raise NotImplementedError("Hybrid search is not supported in Chroma") -class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, - inference_api: Api.inference, - models_apis: Api.models, + inference_api: Inference, files_api: Files | None, ) -> None: super().__init__(files_api=files_api, kvstore=None) log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") self.config = config self.inference_api = inference_api - self.models_api = models_apis self.client = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) - self.vector_db_store = self.kvstore + self.kvstore = await kvstore_impl(self.config.persistence) + self.vector_store_table = self.kvstore if isinstance(self.config, RemoteChromaVectorIOConfig): log.info(f"Connecting to Chroma server at: {self.config.url}") @@ -172,70 +151,58 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: collection = await maybe_await( self.client.get_or_create_collection( - name=vector_db.identifier, - metadata={"vector_db": vector_db.model_dump_json()}, + name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()} ) ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, ChromaIndex(self.client, collection), self.inference_api + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, ChromaIndex(self.client, collection), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - log.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: + log.warning(f"Vector DB {vector_store_id} not found") return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack") - collection = await maybe_await(self.client.get_collection(vector_db_id)) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack") + collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: - raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") - index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api) - self.cache[vector_db_id] = index + raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") + index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api) + self.cache[vector_store_id] = index return index async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Chroma vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py index a1193905a..209ba90bb 100644 --- a/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/llama_stack/providers/remote/vector_io/chroma/config.py @@ -8,21 +8,21 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @json_schema_type class ChromaVectorIOConfig(BaseModel): url: str | None - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") @classmethod def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: return { "url": url, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="chroma_remote_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py index dc5a642d6..526075bb2 100644 --- a/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -13,12 +13,6 @@ async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, Provide from .milvus import MilvusVectorIOAdapter assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" - - impl = MilvusVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py index 899d3678d..8ff9e1328 100644 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/llama_stack/providers/remote/vector_io/milvus/config.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -17,7 +17,7 @@ class MilvusVectorIOConfig(BaseModel): uri: str = Field(description="The URI of the Milvus server") token: str | None = Field(description="The token of the Milvus server") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") - kvstore: KVStoreConfig = Field(description="Config for KV store backend") + persistence: KVStoreReference = Field(description="Config for KV store backend") # This configuration allows additional fields to be passed through to the underlying Milvus client. # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. @@ -28,8 +28,8 @@ class MilvusVectorIOConfig(BaseModel): return { "uri": "${env.MILVUS_ENDPOINT}", "token": "${env.MILVUS_TOKEN}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="milvus_remote_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index d7147a7f0..cccf13816 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -14,15 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -31,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_WEIGHTED, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -40,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig logger = get_logger(name=__name__, category="vector_io::milvus") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::" @@ -74,46 +69,23 @@ class MilvusIndex(EmbeddingIndex): logger.info(f"Creating new collection {self.collection_name} with nullable sparse field") # Create schema for vector search schema = self.client.create_schema() - schema.add_field( - field_name="chunk_id", - datatype=DataType.VARCHAR, - is_primary=True, - max_length=100, - ) + schema.add_field(field_name="chunk_id", datatype=DataType.VARCHAR, is_primary=True, max_length=100) schema.add_field( field_name="content", datatype=DataType.VARCHAR, max_length=65535, enable_analyzer=True, # Enable text analysis for BM25 ) - schema.add_field( - field_name="vector", - datatype=DataType.FLOAT_VECTOR, - dim=len(embeddings[0]), - ) - schema.add_field( - field_name="chunk_content", - datatype=DataType.JSON, - ) + schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0])) + schema.add_field(field_name="chunk_content", datatype=DataType.JSON) # Add sparse vector field for BM25 (required by the function) - schema.add_field( - field_name="sparse", - datatype=DataType.SPARSE_FLOAT_VECTOR, - ) + schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR) # Create indexes index_params = self.client.prepare_index_params() - index_params.add_index( - field_name="vector", - index_type="FLAT", - metric_type="COSINE", - ) + index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE") # Add index for sparse field (required by BM25 function) - index_params.add_index( - field_name="sparse", - index_type="SPARSE_INVERTED_INDEX", - metric_type="BM25", - ) + index_params.add_index(field_name="sparse", index_type="SPARSE_INVERTED_INDEX", metric_type="BM25") # Add BM25 function for full-text search bm25_function = Function( @@ -144,11 +116,7 @@ class MilvusIndex(EmbeddingIndex): } ) try: - await asyncio.to_thread( - self.client.insert, - self.collection_name, - data=data, - ) + await asyncio.to_thread(self.client.insert, self.collection_name, data=data) except Exception as e: logger.error(f"Error inserting chunks into Milvus collection {self.collection_name}: {e}") raise e @@ -167,12 +135,7 @@ class MilvusIndex(EmbeddingIndex): scores = [res["distance"] for res in search_res[0]] return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Perform BM25-based keyword search using Milvus's built-in full-text search. """ @@ -210,12 +173,7 @@ class MilvusIndex(EmbeddingIndex): # Fallback to simple text search return await self._fallback_keyword_search(query_string, k, score_threshold) - async def _fallback_keyword_search( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def _fallback_keyword_search(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Fallback to simple text search when BM25 search is not available. """ @@ -303,12 +261,11 @@ class MilvusIndex(EmbeddingIndex): raise -class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Inference, - models_api: Models, files_api: Files | None, ) -> None: super().__init__(files_api=files_api, kvstore=None) @@ -316,29 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache = {} self.client = None self.inference_api = inference_api - self.models_api = models_api - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, index=MilvusIndex( client=self.client, - collection_name=vector_db.identifier, + collection_name=vector_store.identifier, consistency_level=self.config.consistency_level, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index if isinstance(self.config, RemoteMilvusVectorIOConfig): logger.info(f"Connecting to Milvus server at {self.config.uri}") self.client = MilvusClient(**self.config.model_dump(exclude_none=True)) @@ -355,72 +311,61 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: if isinstance(self.config, RemoteMilvusVectorIOConfig): consistency_level = self.config.consistency_level else: consistency_level = "Strong" - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a milvus vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py index bb4079ab5..8086b7650 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -12,6 +12,6 @@ from .config import PGVectorVectorIOConfig async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): from .pgvector import PGVectorVectorIOAdapter - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None)) + impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 334cbe5be..d81e524e4 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -22,7 +19,9 @@ class PGVectorVectorIOConfig(BaseModel): db: str | None = Field(default="postgres") user: str | None = Field(default="postgres") password: str | None = Field(default="mysecretpassword") - kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) @classmethod def sample_run_config( @@ -41,8 +40,8 @@ class PGVectorVectorIOConfig(BaseModel): "db": db, "user": user, "password": password, - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="pgvector_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::pgvector", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index d55c13103..f28bd3cd9 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -16,26 +16,15 @@ from pydantic import BaseModel, TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name from .config import PGVectorVectorIOConfig @@ -43,7 +32,7 @@ from .config import PGVectorVectorIOConfig log = get_logger(name=__name__, category="vector_io::pgvector") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::" @@ -90,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex): def __init__( self, - vector_db: VectorDB, + vector_store: VectorStore, dimension: int, conn: psycopg2.extensions.connection, kvstore: KVStore | None = None, distance_metric: str = "COSINE", ): - self.vector_db = vector_db + self.vector_store = vector_store self.dimension = dimension self.conn = conn self.kvstore = kvstore @@ -108,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex): try: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # Sanitize the table name by replacing hyphens with underscores - # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens + # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens # when created with patterns like "test-vector-db-{uuid4()}" - sanitized_identifier = sanitize_collection_name(self.vector_db.identifier) + sanitized_identifier = sanitize_collection_name(self.vector_store.identifier) self.table_name = f"vs_{sanitized_identifier}" cur.execute( @@ -133,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex): """ ) except Exception as e: - log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") - raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e + log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") + raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray): assert len(chunks) == len(embeddings), ( @@ -205,12 +194,7 @@ class PGVectorIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring. @@ -317,7 +301,7 @@ class PGVectorIndex(EmbeddingIndex): """Remove a chunk from the PostgreSQL table.""" chunk_ids = [c.chunk_id for c in chunks_for_deletion] with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,)) + cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids)) def get_pgvector_search_function(self) -> str: return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric] @@ -339,26 +323,21 @@ class PGVectorIndex(EmbeddingIndex): ) -class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( - self, - config: PGVectorVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None = None, + self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.conn = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: log.info(f"Initializing PGVector memory adapter with config: {self.config}") - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) await self.initialize_openai_vector_stores() try: @@ -396,71 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: # Persist vector DB metadata in the KV store assert self.kvstore is not None # Upsert model metadata in Postgres - upsert_models(self.conn, [(vector_db.identifier, vector_db)]) + upsert_models(self.conn, [(vector_store.identifier, vector_store)]) # Create and cache the PGVector index table for the vector DB pgvector_index = PGVectorIndex( - vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore + vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorDBWithIndex( - vector_db, - index=pgvector_index, - inference_api=self.inference_api, - ) - self.cache[vector_db.identifier] = index + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + self.cache[vector_store.identifier] = index - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: # Remove provider index and cache - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] # Delete vector DB metadata from KV store assert self.kvstore is not None - await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}") - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn) + index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() - self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) - return self.cache[vector_db_id] + self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) + return self.cache[vector_store_id] async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a PostgreSQL vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py index c4942fbce..e9527f101 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -12,11 +12,6 @@ from .config import QdrantVectorIOConfig async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): from .qdrant import QdrantVectorIOAdapter - impl = QdrantVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ff5506236..01fbcc5cb 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -27,14 +24,14 @@ class QdrantVectorIOConfig(BaseModel): prefix: str | None = None timeout: int | None = None host: str | None = None - kvstore: KVStoreConfig + persistence: KVStoreReference @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "api_key": "${env.QDRANT_API_KEY:=}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="qdrant_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant_remote", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 8b90935cd..93d0894a6 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -16,8 +16,6 @@ from qdrant_client.models import PointStruct from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, QueryChunksResponse, @@ -25,16 +23,13 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - EmbeddingIndex, - VectorDBWithIndex, -) +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig @@ -43,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id" # KV store prefixes for vector databases VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::" def convert_id(_id: str) -> str: @@ -99,8 +94,7 @@ class QdrantIndex(EmbeddingIndex): chunk_ids = [convert_id(c.chunk_id) for c in chunks_for_deletion] try: await self.client.delete( - collection_name=self.collection_name, - points_selector=models.PointIdsList(points=chunk_ids), + collection_name=self.collection_name, points_selector=models.PointIdsList(points=chunk_ids) ) except Exception as e: log.error(f"Error deleting chunks from Qdrant collection {self.collection_name}: {e}") @@ -133,12 +127,7 @@ class QdrantIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError("Keyword search is not supported in Qdrant") async def query_hybrid( @@ -156,12 +145,11 @@ class QdrantIndex(EmbeddingIndex): await self.client.delete_collection(collection_name=self.collection_name) -class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, inference_api: Inference, - models_api: Models, files_api: Files | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) @@ -169,27 +157,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api - self.models_api = models_api - self.vector_db_store = None + self.vector_store_table = None self._qdrant_lock = asyncio.Lock() async def initialize(self) -> None: - client_config = self.config.model_dump(exclude_none=True, exclude={"kvstore"}) + client_config = self.config.model_dump(exclude_none=True, exclude={"persistence"}) self.client = AsyncQdrantClient(**client_config) - self.kvstore = await kvstore_impl(self.config.kvstore) + self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, - QdrantIndex(self.client, vector_db.identifier), - self.inference_api, + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() async def shutdown(self) -> None: @@ -197,68 +182,57 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set(key=key, value=vector_db.model_dump_json()) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) - index = VectorDBWithIndex( - vector_db=vector_db, - index=QdrantIndex(self.client, vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(self.client, vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] assert self.kvstore is not None - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise ValueError(f"Vector DB not found {vector_db_id}") + if self.vector_store_table is None: + raise ValueError(f"Vector DB not found {vector_store_id}") - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, - index=QdrantIndex(client=self.client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -279,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Qdrant vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 2040dad96..12e11d013 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -12,11 +12,6 @@ from .config import WeaviateVectorIOConfig async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): from .weaviate import WeaviateVectorIOAdapter - impl = WeaviateVectorIOAdapter( - config, - deps[Api.inference], - deps[Api.models], - deps.get(Api.files), - ) + impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py index b693e294e..66dbf1fed 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -8,10 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.providers.utils.kvstore.config import ( - KVStoreConfig, - SqliteKVStoreConfig, -) +from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.schema_utils import json_schema_type @@ -19,19 +16,17 @@ from llama_stack.schema_utils import json_schema_type class WeaviateVectorIOConfig(BaseModel): weaviate_api_key: str | None = Field(description="The API key for the Weaviate instance", default=None) weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080") - kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) @classmethod - def sample_run_config( - cls, - __distro_dir__: str, - **kwargs: Any, - ) -> dict[str, Any]: + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { "weaviate_api_key": None, "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}", - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="weaviate_registry.db", - ), + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::weaviate", + ).model_dump(exclude_none=True), } diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index d8b11c441..66922aa3f 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -16,22 +16,19 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.openai_vector_store_mixin import ( - OpenAIVectorStoreMixin, -) +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -40,7 +37,7 @@ from .config import WeaviateVectorIOConfig log = get_logger(name=__name__, category="vector_io::weaviate") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::" @@ -48,12 +45,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten class WeaviateIndex(EmbeddingIndex): - def __init__( - self, - client: weaviate.WeaviateClient, - collection_name: str, - kvstore: KVStore | None = None, - ): + def __init__(self, client: weaviate.WeaviateClient, collection_name: str, kvstore: KVStore | None = None): self.client = client self.collection_name = sanitize_collection_name(collection_name, weaviate_format=True) self.kvstore = kvstore @@ -108,9 +100,7 @@ class WeaviateIndex(EmbeddingIndex): try: results = collection.query.near_vector( - near_vector=embedding.tolist(), - limit=k, - return_metadata=wvc.query.MetadataQuery(distance=True), + near_vector=embedding.tolist(), limit=k, return_metadata=wvc.query.MetadataQuery(distance=True) ) except Exception as e: log.error(f"Weaviate client vector search failed: {e}") @@ -153,12 +143,7 @@ class WeaviateIndex(EmbeddingIndex): collection = self.client.collections.get(sanitized_collection_name) collection.data.delete_many(where=Filter.by_property("id").contains_any(chunk_ids)) - async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, - ) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs BM25-based keyword search using Weaviate's built-in full-text search. Args: @@ -175,9 +160,7 @@ class WeaviateIndex(EmbeddingIndex): # Perform BM25 keyword search on chunk_content field try: results = collection.query.bm25( - query=query_string, - limit=k, - return_metadata=wvc.query.MetadataQuery(score=True), + query=query_string, limit=k, return_metadata=wvc.query.MetadataQuery(score=True) ) except Exception as e: log.error(f"Weaviate client keyword search failed: {e}") @@ -274,26 +257,14 @@ class WeaviateIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) -class WeaviateVectorIOAdapter( - OpenAIVectorStoreMixin, - VectorIO, - NeedsRequestProviderData, - VectorDBsProtocolPrivate, -): - def __init__( - self, - config: WeaviateVectorIOConfig, - inference_api: Inference, - models_api: Models, - files_api: Files | None, - ) -> None: +class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate): + def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.models_api = models_api self.client_cache = {} self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" def _get_client(self) -> weaviate.WeaviateClient: @@ -301,10 +272,7 @@ class WeaviateVectorIOAdapter( log.info("Using Weaviate locally in container") host, port = self.config.weaviate_cluster_url.split(":") key = "local_test" - client = weaviate.connect_to_local( - host=host, - port=port, - ) + client = weaviate.connect_to_local(host=host, port=port) else: log.info("Using Weaviate remote cluster with URL") key = f"{self.config.weaviate_cluster_url}::{self.config.weaviate_api_key}" @@ -320,8 +288,8 @@ class WeaviateVectorIOAdapter( async def initialize(self) -> None: """Set up KV store and load existing vector DBs and OpenAI vector stores.""" # Initialize KV store for metadata if configured - if self.config.kvstore is not None: - self.kvstore = await kvstore_impl(self.config.kvstore) + if self.config.persistence is not None: + self.kvstore = await kvstore_impl(self.config.persistence) else: self.kvstore = None log.info("No kvstore configured, registry will not persist across restarts") @@ -332,17 +300,11 @@ class WeaviateVectorIOAdapter( end_key = f"{VECTOR_DBS_PREFIX}\xff" stored = await self.kvstore.values_in_range(start_key, end_key) for raw in stored: - vector_db = VectorDB.model_validate_json(raw) + vector_store = VectorStore.model_validate_json(raw) client = self._get_client() - idx = WeaviateIndex( - client=client, - collection_name=vector_db.identifier, - kvstore=self.kvstore, - ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, - index=idx, - inference_api=self.inference_api, + idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore) + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, index=idx, inference_api=self.inference_api ) # Load OpenAI vector stores metadata into cache @@ -354,90 +316,74 @@ class WeaviateVectorIOAdapter( # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db( - self, - vector_db: VectorDB, - ) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) # Create collection if it doesn't exist if not client.collections.exists(sanitized_collection_name): client.collections.create( name=sanitized_collection_name, vectorizer_config=wvc.config.Configure.Vectorizer.none(), properties=[ - wvc.config.Property( - name="chunk_content", - data_type=wvc.config.DataType.TEXT, - ), + wvc.config.Property(name="chunk_content", data_type=wvc.config.DataType.TEXT), ], ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, - WeaviateIndex(client=client, collection_name=sanitized_collection_name), - self.inference_api, + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True) - if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: + sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True) + if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: return client.collections.delete(sanitized_collection_name) - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) if not client.collections.exists(sanitized_collection_name): raise ValueError(f"Collection with name `{sanitized_collection_name}` not found") - index = VectorDBWithIndex( - vector_db=vector_db, - index=WeaviateIndex(client=client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=WeaviateIndex(client=client, collection_name=vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, + self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py index 901f77c67..8e20bca6b 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/llama_stack/providers/utils/inference/inference_store.py @@ -15,12 +15,13 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, Order, ) -from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl +from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl logger = get_logger(name=__name__, category="inference") @@ -28,33 +29,32 @@ logger = get_logger(name=__name__, category="inference") class InferenceStore: def __init__( self, - config: InferenceStoreConfig | SqlStoreConfig, + reference: InferenceStoreReference, policy: list[AccessRule], ): - # Handle backward compatibility - if not isinstance(config, InferenceStoreConfig): - # Legacy: SqlStoreConfig passed directly as config - config = InferenceStoreConfig( - sql_store_config=config, - ) - - self.config = config - self.sql_store_config = config.sql_store_config + self.reference = reference self.sql_store = None self.policy = policy - # Disable write queue for SQLite to avoid concurrency issues - self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite - # Async write queue and worker control self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = config.max_write_queue_size - self._num_writers: int = max(1, config.num_writers) + self._max_write_queue_size: int = reference.max_write_queue_size + self._num_writers: int = max(1, reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) + base_store = sqlstore_impl(self.reference) + self.sql_store = AuthorizedSqlStore(base_store, self.policy) + + # Disable write queue for SQLite to avoid concurrency issues + backend_name = self.reference.backend + backend_config = _SQLSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError( + f"Unregistered SQL backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + self.enable_write_queue = backend_config.type != StorageBackendType.SQL_SQLITE await self.sql_store.create_table( "chat_completions", { diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index 7b6a79350..c0582abc4 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -4,143 +4,20 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import re -from enum import Enum -from typing import Annotated, Literal +from typing import Annotated -from pydantic import BaseModel, Field, field_validator - -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR - - -class KVStoreType(Enum): - redis = "redis" - sqlite = "sqlite" - postgres = "postgres" - mongodb = "mongodb" - - -class CommonConfig(BaseModel): - namespace: str | None = Field( - default=None, - description="All keys will be prefixed with this namespace", - ) - - -class RedisKVStoreConfig(CommonConfig): - type: Literal["redis"] = KVStoreType.redis.value - host: str = "localhost" - port: int = 6379 - - @property - def url(self) -> str: - return f"redis://{self.host}:{self.port}" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["redis"] - - @classmethod - def sample_run_config(cls): - return { - "type": "redis", - "host": "${env.REDIS_HOST:=localhost}", - "port": "${env.REDIS_PORT:=6379}", - } - - -class SqliteKVStoreConfig(CommonConfig): - type: Literal["sqlite"] = KVStoreType.sqlite.value - db_path: str = Field( - default=(RUNTIME_BASE_DIR / "kvstore.db").as_posix(), - description="File path for the sqlite database", - ) - - @classmethod - def pip_packages(cls) -> list[str]: - return ["aiosqlite"] - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): - return { - "type": "sqlite", - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - -class PostgresKVStoreConfig(CommonConfig): - type: Literal["postgres"] = KVStoreType.postgres.value - host: str = "localhost" - port: int = 5432 - db: str = "llamastack" - user: str - password: str | None = None - ssl_mode: str | None = None - ca_cert_path: str | None = None - table_name: str = "llamastack_kvstore" - - @classmethod - def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): - return { - "type": "postgres", - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", - } - - @classmethod - @field_validator("table_name") - def validate_table_name(cls, v: str) -> str: - # PostgreSQL identifiers rules: - # - Must start with a letter or underscore - # - Can contain letters, numbers, and underscores - # - Maximum length is 63 bytes - pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" - if not re.match(pattern, v): - raise ValueError( - "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" - ) - if len(v) > 63: - raise ValueError("Table name must be less than 63 characters") - return v - - @classmethod - def pip_packages(cls) -> list[str]: - return ["psycopg2-binary"] - - -class MongoDBKVStoreConfig(CommonConfig): - type: Literal["mongodb"] = KVStoreType.mongodb.value - host: str = "localhost" - port: int = 27017 - db: str = "llamastack" - user: str | None = None - password: str | None = None - collection_name: str = "llamastack_kvstore" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["pymongo"] - - @classmethod - def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): - return { - "type": "mongodb", - "host": "${env.MONGODB_HOST:=localhost}", - "port": "${env.MONGODB_PORT:=5432}", - "db": "${env.MONGODB_DB}", - "user": "${env.MONGODB_USER}", - "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", - } +from pydantic import Field +from llama_stack.core.storage.datatypes import ( + MongoDBKVStoreConfig, + PostgresKVStoreConfig, + RedisKVStoreConfig, + SqliteKVStoreConfig, + StorageBackendType, +) KVStoreConfig = Annotated[ - RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, - Field(discriminator="type", default=KVStoreType.sqlite.value), + RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type") ] @@ -148,13 +25,13 @@ def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]: """Get pip packages for KV store config, handling both dict and object cases.""" if isinstance(store_config, dict): store_type = store_config.get("type") - if store_type == "sqlite": + if store_type == StorageBackendType.KV_SQLITE.value: return SqliteKVStoreConfig.pip_packages() - elif store_type == "postgres": + elif store_type == StorageBackendType.KV_POSTGRES.value: return PostgresKVStoreConfig.pip_packages() - elif store_type == "redis": + elif store_type == StorageBackendType.KV_REDIS.value: return RedisKVStoreConfig.pip_packages() - elif store_type == "mongodb": + elif store_type == StorageBackendType.KV_MONGODB.value: return MongoDBKVStoreConfig.pip_packages() else: raise ValueError(f"Unknown KV store type: {store_type}") diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/llama_stack/providers/utils/kvstore/kvstore.py index 426523d8e..eee51e5d9 100644 --- a/llama_stack/providers/utils/kvstore/kvstore.py +++ b/llama_stack/providers/utils/kvstore/kvstore.py @@ -4,9 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from __future__ import annotations + +from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType from .api import KVStore -from .config import KVStoreConfig, KVStoreType +from .config import KVStoreConfig def kvstore_dependencies(): @@ -44,20 +52,41 @@ class InmemoryKVStoreImpl(KVStore): del self._store[key] -async def kvstore_impl(config: KVStoreConfig) -> KVStore: - if config.type == KVStoreType.redis.value: +_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} + + +def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available KV store backends for reference resolution.""" + global _KVSTORE_BACKENDS + + _KVSTORE_BACKENDS.clear() + for name, cfg in backends.items(): + _KVSTORE_BACKENDS[name] = cfg + + +async def kvstore_impl(reference: KVStoreReference) -> KVStore: + backend_name = reference.backend + + backend_config = _KVSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") + + config = backend_config.model_copy() + config.namespace = reference.namespace + + if config.type == StorageBackendType.KV_REDIS.value: from .redis import RedisKVStoreImpl impl = RedisKVStoreImpl(config) - elif config.type == KVStoreType.sqlite.value: + elif config.type == StorageBackendType.KV_SQLITE.value: from .sqlite import SqliteKVStoreImpl impl = SqliteKVStoreImpl(config) - elif config.type == KVStoreType.postgres.value: + elif config.type == StorageBackendType.KV_POSTGRES.value: from .postgres import PostgresKVStoreImpl impl = PostgresKVStoreImpl(config) - elif config.type == KVStoreType.mongodb.value: + elif config.type == StorageBackendType.KV_MONGODB.value: from .mongodb import MongoDBKVStoreImpl impl = MongoDBKVStoreImpl(config) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 0e550434e..6629fb965 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -17,8 +17,6 @@ from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.models import Model, Models -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -44,6 +42,7 @@ from llama_stack.apis.vector_io import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore @@ -64,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::" @@ -81,13 +80,14 @@ class OpenAIVectorStoreMixin(ABC): # Implementing classes should call super().__init__() in their __init__ method # to properly initialize the mixin attributes. def __init__( - self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None + self, + files_api: Files | None = None, + kvstore: KVStore | None = None, ): self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_file_batches: dict[str, dict[str, Any]] = {} self.files_api = files_api self.kvstore = kvstore - self.models_api = models_api self._last_file_batch_cleanup_time = 0 self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} @@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC): pass @abstractmethod - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: """Register a vector database (provider-specific implementation).""" pass @abstractmethod - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: """Unregister a vector database (provider-specific implementation).""" pass @@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC): extra_body = params.model_extra or {} metadata = params.metadata or {} - provider_vector_db_id = extra_body.get("provider_vector_db_id") + provider_vector_store_id = extra_body.get("provider_vector_store_id") # Use embedding info from metadata if available, otherwise from extra_body if metadata.get("embedding_model"): @@ -389,42 +389,29 @@ class OpenAIVectorStoreMixin(ABC): # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) - # Derive the canonical vector_db_id (allow override, else generate) - vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") + # Derive the canonical vector_store_id (allow override, else generate) + vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") if embedding_model is None: - result = await self._get_default_embedding_model_and_dimension() - if result is None: - raise ValueError( - "embedding_model is required in extra_body when creating a vector store. " - "No default embedding model could be determined automatically." - ) - embedding_model, embedding_dimension = result - elif embedding_dimension is None: - # Embedding model was provided but dimension wasn't, look it up - embedding_dimension = await self._get_embedding_dimension_for_model(embedding_model) - if embedding_dimension is None: - raise ValueError( - f"Could not determine embedding dimension for model '{embedding_model}'. " - "Please provide embedding_dimension in extra_body or ensure the model metadata contains embedding_dimension." - ) + raise ValueError("embedding_model is required") if embedding_dimension is None: raise ValueError("Embedding dimension is required") - # Register the VectorDB backing this vector store + # Register the VectorStore backing this vector store if provider_id is None: raise ValueError("Provider ID is required but was not provided") - vector_db = VectorDB( - identifier=vector_db_id, + # call to the provider to create any index, etc. + vector_store = VectorStore( + identifier=vector_store_id, embedding_dimension=embedding_dimension, embedding_model=embedding_model, provider_id=provider_id, - provider_resource_id=vector_db_id, - vector_db_name=params.name, + provider_resource_id=vector_store_id, + vector_store_name=params.name, ) - await self.register_vector_db(vector_db) + await self.register_vector_store(vector_store) # Create OpenAI vector store metadata status = "completed" @@ -438,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC): total=0, ) store_info: dict[str, Any] = { - "id": vector_db_id, + "id": vector_store_id, "object": "vector_store", "created_at": created_at, "name": params.name, @@ -455,104 +442,25 @@ class OpenAIVectorStoreMixin(ABC): # Add provider information to metadata if provided if provider_id: metadata["provider_id"] = provider_id - if provider_vector_db_id: - metadata["provider_vector_db_id"] = provider_vector_db_id + if provider_vector_store_id: + metadata["provider_vector_store_id"] = provider_vector_store_id store_info["metadata"] = metadata # Save to persistent storage (provider-specific) - await self._save_openai_vector_store(vector_db_id, store_info) + await self._save_openai_vector_store(vector_store_id, store_info) # Store in memory cache - self.openai_vector_stores[vector_db_id] = store_info + self.openai_vector_stores[vector_store_id] = store_info # Now that our vector store is created, attach any files that were provided file_ids = params.file_ids or [] - tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] + tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) # Get the updated store info and return it - store_info = self.openai_vector_stores[vector_db_id] + store_info = self.openai_vector_stores[vector_store_id] return VectorStoreObject.model_validate(store_info) - async def _get_embedding_models(self) -> list[Model]: - """Get list of embedding models from the models API.""" - if not self.models_api: - return [] - - models_response = await self.models_api.list_models() - models_list = models_response.data if hasattr(models_response, "data") else models_response - - embedding_models = [] - for model in models_list: - if not isinstance(model, Model): - logger.warning(f"Non-Model object found in models list: {type(model)} - {model}") - continue - if model.model_type == "embedding": - embedding_models.append(model) - - return embedding_models - - async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None: - """Get embedding dimension for a specific model by looking it up in the models API. - - Args: - model_id: The identifier of the embedding model (supports both prefixed and non-prefixed) - - Returns: - The embedding dimension for the model, or None if not found - """ - embedding_models = await self._get_embedding_models() - - for model in embedding_models: - # Check for exact match first - if model.identifier == model_id: - embedding_dimension = model.metadata.get("embedding_dimension") - if embedding_dimension is not None: - return int(embedding_dimension) - else: - logger.warning(f"Model {model_id} found but has no embedding_dimension in metadata") - return None - - # Check for prefixed/unprefixed variations - # If model_id is unprefixed, check if it matches the resource_id - if model.provider_resource_id == model_id: - embedding_dimension = model.metadata.get("embedding_dimension") - if embedding_dimension is not None: - return int(embedding_dimension) - - return None - - async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None: - """Get default embedding model from the models API. - - Looks for embedding models marked with default_configured=True in metadata. - Returns None if no default embedding model is found. - Raises ValueError if multiple defaults are found. - """ - embedding_models = await self._get_embedding_models() - - default_models = [] - for model in embedding_models: - if model.metadata.get("default_configured") is True: - default_models.append(model.identifier) - - if len(default_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_models}. " - "Only one embedding model can be marked as default." - ) - - if default_models: - model_id = default_models[0] - embedding_dimension = await self._get_embedding_dimension_for_model(model_id) - if embedding_dimension is None: - raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata") - logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}") - return model_id, embedding_dimension - - logger.debug("No default embedding models found") - return None - async def openai_list_vector_stores( self, limit: int | None = 20, @@ -660,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC): # Also delete the underlying vector DB try: - await self.unregister_vector_db(vector_store_id) + await self.unregister_vector_store(vector_store_id) except Exception as e: logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}") diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 0375ecaaa..9e9c9a08a 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -12,19 +12,16 @@ from dataclasses import dataclass from typing import Any from urllib.parse import unquote -import httpx import numpy as np from numpy.typing import NDArray from pydantic import BaseModel from llama_stack.apis.common.content_types import ( - URL, InterleavedContent, ) from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody -from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api @@ -129,31 +126,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en return "" -async def content_from_doc(doc: RAGDocument) -> str: - if isinstance(doc.content, URL): - if doc.content.uri.startswith("data:"): - return content_from_data(doc.content.uri) - async with httpx.AsyncClient() as client: - r = await client.get(doc.content.uri) - if doc.mime_type == "application/pdf": - return parse_pdf(r.content) - return r.text - elif isinstance(doc.content, str): - pattern = re.compile("^(https?://|file://|data:)") - if pattern.match(doc.content): - if doc.content.startswith("data:"): - return content_from_data(doc.content) - async with httpx.AsyncClient() as client: - r = await client.get(doc.content) - if doc.mime_type == "application/pdf": - return parse_pdf(r.content) - return r.text - return doc.content - else: - # will raise ValueError if the content is not List[InterleavedContent] or InterleavedContent - return interleaved_content_as_str(doc.content) - - def make_overlapped_chunks( document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any] ) -> list[Chunk]: @@ -187,7 +159,7 @@ def make_overlapped_chunks( updated_timestamp=int(time.time()), chunk_window=chunk_window, chunk_tokenizer=default_tokenizer, - chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` + chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks` content_token_count=len(toks), metadata_token_count=len(metadata_tokens), ) @@ -255,8 +227,8 @@ class EmbeddingIndex(ABC): @dataclass -class VectorDBWithIndex: - vector_db: VectorDB +class VectorStoreWithIndex: + vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference @@ -269,14 +241,14 @@ class VectorDBWithIndex: if c.embedding is None: chunks_to_embed.append(c) if c.chunk_metadata: - c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model - c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension + c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model + c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension else: - _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) + _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension) if chunks_to_embed: params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[c.content for c in chunks_to_embed], ) resp = await self.inference_api.openai_embeddings(params) @@ -319,7 +291,7 @@ class VectorDBWithIndex: return await self.index.query_keyword(query_string, k, score_threshold) params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[query_string], ) embeddings_response = await self.inference_api.openai_embeddings(params) diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 36370b492..d5c243252 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -18,13 +18,13 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectWithInput, ) from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl +from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl logger = get_logger(name=__name__, category="openai_responses") @@ -45,39 +45,38 @@ class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput): class ResponsesStore: def __init__( self, - config: ResponsesStoreConfig | SqlStoreConfig, + reference: ResponsesStoreReference | SqlStoreReference, policy: list[AccessRule], ): - # Handle backward compatibility - if not isinstance(config, ResponsesStoreConfig): - # Legacy: SqlStoreConfig passed directly as config - config = ResponsesStoreConfig( - sql_store_config=config, - ) + if isinstance(reference, ResponsesStoreReference): + self.reference = reference + else: + self.reference = ResponsesStoreReference(**reference.model_dump()) - self.config = config - self.sql_store_config = config.sql_store_config - if not self.sql_store_config: - self.sql_store_config = SqliteSqlStoreConfig( - db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), - ) - self.sql_store = None self.policy = policy - - # Disable write queue for SQLite to avoid concurrency issues - self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + self.sql_store = None + self.enable_write_queue = True # Async write queue and worker control self._queue: ( asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None ) = None self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = config.max_write_queue_size - self._num_writers: int = max(1, config.num_writers) + self._max_write_queue_size: int = self.reference.max_write_queue_size + self._num_writers: int = max(1, self.reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) + base_store = sqlstore_impl(self.reference) + self.sql_store = AuthorizedSqlStore(base_store, self.policy) + + backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) + if backend_config is None: + raise ValueError( + f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + if backend_config.type == StorageBackendType.SQL_SQLITE: + self.enable_write_queue = False await self.sql_store.create_table( "openai_responses", { diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index e1da4db6e..3dfc82677 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -12,10 +12,10 @@ from llama_stack.core.access_control.conditions import ProtectedResource from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope from llama_stack.core.datatypes import User from llama_stack.core.request_headers import get_authenticated_user +from llama_stack.core.storage.datatypes import StorageBackendType from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore -from .sqlstore import SqlStoreType logger = get_logger(name=__name__, category="providers::utils") @@ -82,8 +82,8 @@ class AuthorizedSqlStore: if not hasattr(self.sql_store, "config"): raise ValueError("SqlStore must have a config attribute to be used with AuthorizedSqlStore") - self.database_type = self.sql_store.config.type - if self.database_type not in [SqlStoreType.postgres, SqlStoreType.sqlite]: + self.database_type = self.sql_store.config.type.value + if self.database_type not in [StorageBackendType.SQL_POSTGRES.value, StorageBackendType.SQL_SQLITE.value]: raise ValueError(f"Unsupported database type: {self.database_type}") def _validate_sql_optimized_policy(self) -> None: @@ -220,9 +220,9 @@ class AuthorizedSqlStore: Returns: SQL expression to extract JSON value """ - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: return f"{column}->'{path}'" - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: return f"JSON_EXTRACT({column}, '$.{path}')" else: raise ValueError(f"Unsupported database type: {self.database_type}") @@ -237,9 +237,9 @@ class AuthorizedSqlStore: Returns: SQL expression to extract JSON value as text """ - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: return f"{column}->>'{path}'" - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: return f"JSON_EXTRACT({column}, '$.{path}')" else: raise ValueError(f"Unsupported database type: {self.database_type}") @@ -248,10 +248,10 @@ class AuthorizedSqlStore: """Get the SQL conditions for public access.""" # Public records are records that have no owner_principal or access_attributes conditions = ["owner_principal = ''"] - if self.database_type == SqlStoreType.postgres: + if self.database_type == StorageBackendType.SQL_POSTGRES.value: # Postgres stores JSON null as 'null' conditions.append("access_attributes::text = 'null'") - elif self.database_type == SqlStoreType.sqlite: + elif self.database_type == StorageBackendType.SQL_SQLITE.value: conditions.append("access_attributes = 'null'") else: raise ValueError(f"Unsupported database type: {self.database_type}") diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 23cd6444e..c1ccd73dd 100644 --- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -26,10 +26,10 @@ from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.sql.elements import ColumnElement from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, SqlStore -from .sqlstore import SqlAlchemySqlStoreConfig logger = get_logger(name=__name__, category="providers::utils") diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index fc44402ae..31801c4ca 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -4,90 +4,28 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from abc import abstractmethod -from enum import StrEnum -from pathlib import Path -from typing import Annotated, Literal +from typing import Annotated, cast -from pydantic import BaseModel, Field +from pydantic import Field -from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.storage.datatypes import ( + PostgresSqlStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageBackendType, +) from .api import SqlStore sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] - -class SqlStoreType(StrEnum): - sqlite = "sqlite" - postgres = "postgres" - - -class SqlAlchemySqlStoreConfig(BaseModel): - @property - @abstractmethod - def engine_str(self) -> str: ... - - # TODO: move this when we have a better way to specify dependencies with internal APIs - @classmethod - def pip_packages(cls) -> list[str]: - return ["sqlalchemy[asyncio]"] - - -class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[SqlStoreType.sqlite] = SqlStoreType.sqlite - db_path: str = Field( - default=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), - description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", - ) - - @property - def engine_str(self) -> str: - return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): - return { - "type": "sqlite", - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["aiosqlite"] - - -class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[SqlStoreType.postgres] = SqlStoreType.postgres - host: str = "localhost" - port: int = 5432 - db: str = "llamastack" - user: str - password: str | None = None - - @property - def engine_str(self) -> str: - return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["asyncpg"] - - @classmethod - def sample_run_config(cls, **kwargs): - return { - "type": "postgres", - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - } +_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} SqlStoreConfig = Annotated[ SqliteSqlStoreConfig | PostgresSqlStoreConfig, - Field(discriminator="type", default=SqlStoreType.sqlite.value), + Field(discriminator="type"), ] @@ -95,9 +33,9 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: """Get pip packages for SQL store config, handling both dict and object cases.""" if isinstance(store_config, dict): store_type = store_config.get("type") - if store_type == "sqlite": + if store_type == StorageBackendType.SQL_SQLITE.value: return SqliteSqlStoreConfig.pip_packages() - elif store_type == "postgres": + elif store_type == StorageBackendType.SQL_POSTGRES.value: return PostgresSqlStoreConfig.pip_packages() else: raise ValueError(f"Unknown SQL store type: {store_type}") @@ -105,12 +43,28 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: return store_config.pip_packages() -def sqlstore_impl(config: SqlStoreConfig) -> SqlStore: - if config.type in [SqlStoreType.sqlite, SqlStoreType.postgres]: +def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: + backend_name = reference.backend + + backend_config = _SQLSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError( + f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + + if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl - impl = SqlAlchemySqlStoreImpl(config) + config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() + return SqlAlchemySqlStoreImpl(config) else: - raise ValueError(f"Unknown sqlstore type {config.type}") + raise ValueError(f"Unknown sqlstore type {backend_config.type}") - return impl + +def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available SQL store backends for reference resolution.""" + global _SQLSTORE_BACKENDS + + _SQLSTORE_BACKENDS.clear() + for name, cfg in backends.items(): + _SQLSTORE_BACKENDS[name] = cfg diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index c138de535..9b22dd8d5 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -2072,9 +2072,9 @@ } }, "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", - "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", "dev": true, "license": "MIT" }, @@ -3199,61 +3199,54 @@ } }, "node_modules/@tailwindcss/node": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.6.tgz", - "integrity": "sha512-ed6zQbgmKsjsVvodAS1q1Ld2BolEuxJOSyyNc+vhkjdmfNUDCmQnlXBfQkHrlzNmslxHsQU/bFmzcEbv4xXsLg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.14.tgz", + "integrity": "sha512-hpz+8vFk3Ic2xssIA3e01R6jkmsAhvkQdXlEbRTk6S10xDAtiQiM3FyvZVGsucefq764euO/b8WUW9ysLdThHw==", "dev": true, "license": "MIT", "dependencies": { - "@ampproject/remapping": "^2.3.0", - "enhanced-resolve": "^5.18.1", - "jiti": "^2.4.2", - "lightningcss": "1.29.2", - "magic-string": "^0.30.17", + "@jridgewell/remapping": "^2.3.4", + "enhanced-resolve": "^5.18.3", + "jiti": "^2.6.0", + "lightningcss": "1.30.1", + "magic-string": "^0.30.19", "source-map-js": "^1.2.1", - "tailwindcss": "4.1.6" + "tailwindcss": "4.1.14" } }, - "node_modules/@tailwindcss/node/node_modules/tailwindcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", - "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", - "dev": true, - "license": "MIT" - }, "node_modules/@tailwindcss/oxide": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz", - "integrity": "sha512-0bpEBQiGx+227fW4G0fLQ8vuvyy5rsB1YIYNapTq3aRsJ9taF3f5cCaovDjN5pUGKKzcpMrZst/mhNaKAPOHOA==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.14.tgz", + "integrity": "sha512-23yx+VUbBwCg2x5XWdB8+1lkPajzLmALEfMb51zZUBYaYVPDQvBSD/WYDqiVyBIo2BZFa3yw1Rpy3G2Jp+K0dw==", "dev": true, "hasInstallScript": true, "license": "MIT", "dependencies": { "detect-libc": "^2.0.4", - "tar": "^7.4.3" + "tar": "^7.5.1" }, "engines": { "node": ">= 10" }, "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.1.6", - "@tailwindcss/oxide-darwin-arm64": "4.1.6", - "@tailwindcss/oxide-darwin-x64": "4.1.6", - "@tailwindcss/oxide-freebsd-x64": "4.1.6", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.6", - "@tailwindcss/oxide-linux-arm64-gnu": "4.1.6", - "@tailwindcss/oxide-linux-arm64-musl": "4.1.6", - "@tailwindcss/oxide-linux-x64-gnu": "4.1.6", - "@tailwindcss/oxide-linux-x64-musl": "4.1.6", - "@tailwindcss/oxide-wasm32-wasi": "4.1.6", - "@tailwindcss/oxide-win32-arm64-msvc": "4.1.6", - "@tailwindcss/oxide-win32-x64-msvc": "4.1.6" + "@tailwindcss/oxide-android-arm64": "4.1.14", + "@tailwindcss/oxide-darwin-arm64": "4.1.14", + "@tailwindcss/oxide-darwin-x64": "4.1.14", + "@tailwindcss/oxide-freebsd-x64": "4.1.14", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.14", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.14", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.14", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.14", + "@tailwindcss/oxide-linux-x64-musl": "4.1.14", + "@tailwindcss/oxide-wasm32-wasi": "4.1.14", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.14", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.14" } }, "node_modules/@tailwindcss/oxide-android-arm64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.6.tgz", - "integrity": "sha512-VHwwPiwXtdIvOvqT/0/FLH/pizTVu78FOnI9jQo64kSAikFSZT7K4pjyzoDpSMaveJTGyAKvDjuhxJxKfmvjiQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.14.tgz", + "integrity": "sha512-a94ifZrGwMvbdeAxWoSuGcIl6/DOP5cdxagid7xJv6bwFp3oebp7y2ImYsnZBMTwjn5Ev5xESvS3FFYUGgPODQ==", "cpu": [ "arm64" ], @@ -3268,9 +3261,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-arm64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.6.tgz", - "integrity": "sha512-weINOCcqv1HVBIGptNrk7c6lWgSFFiQMcCpKM4tnVi5x8OY2v1FrV76jwLukfT6pL1hyajc06tyVmZFYXoxvhQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.14.tgz", + "integrity": "sha512-HkFP/CqfSh09xCnrPJA7jud7hij5ahKyWomrC3oiO2U9i0UjP17o9pJbxUN0IJ471GTQQmzwhp0DEcpbp4MZTA==", "cpu": [ "arm64" ], @@ -3285,9 +3278,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-x64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.6.tgz", - "integrity": "sha512-3FzekhHG0ww1zQjQ1lPoq0wPrAIVXAbUkWdWM8u5BnYFZgb9ja5ejBqyTgjpo5mfy0hFOoMnMuVDI+7CXhXZaQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.14.tgz", + "integrity": "sha512-eVNaWmCgdLf5iv6Qd3s7JI5SEFBFRtfm6W0mphJYXgvnDEAZ5sZzqmI06bK6xo0IErDHdTA5/t7d4eTfWbWOFw==", "cpu": [ "x64" ], @@ -3302,9 +3295,9 @@ } }, "node_modules/@tailwindcss/oxide-freebsd-x64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.6.tgz", - "integrity": "sha512-4m5F5lpkBZhVQJq53oe5XgJ+aFYWdrgkMwViHjRsES3KEu2m1udR21B1I77RUqie0ZYNscFzY1v9aDssMBZ/1w==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.14.tgz", + "integrity": "sha512-QWLoRXNikEuqtNb0dhQN6wsSVVjX6dmUFzuuiL09ZeXju25dsei2uIPl71y2Ic6QbNBsB4scwBoFnlBfabHkEw==", "cpu": [ "x64" ], @@ -3319,9 +3312,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.6.tgz", - "integrity": "sha512-qU0rHnA9P/ZoaDKouU1oGPxPWzDKtIfX7eOGi5jOWJKdxieUJdVV+CxWZOpDWlYTd4N3sFQvcnVLJWJ1cLP5TA==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.14.tgz", + "integrity": "sha512-VB4gjQni9+F0VCASU+L8zSIyjrLLsy03sjcR3bM0V2g4SNamo0FakZFKyUQ96ZVwGK4CaJsc9zd/obQy74o0Fw==", "cpu": [ "arm" ], @@ -3336,9 +3329,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.6.tgz", - "integrity": "sha512-jXy3TSTrbfgyd3UxPQeXC3wm8DAgmigzar99Km9Sf6L2OFfn/k+u3VqmpgHQw5QNfCpPe43em6Q7V76Wx7ogIQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.14.tgz", + "integrity": "sha512-qaEy0dIZ6d9vyLnmeg24yzA8XuEAD9WjpM5nIM1sUgQ/Zv7cVkharPDQcmm/t/TvXoKo/0knI3me3AGfdx6w1w==", "cpu": [ "arm64" ], @@ -3353,9 +3346,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-musl": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.6.tgz", - "integrity": "sha512-8kjivE5xW0qAQ9HX9reVFmZj3t+VmljDLVRJpVBEoTR+3bKMnvC7iLcoSGNIUJGOZy1mLVq7x/gerVg0T+IsYw==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.14.tgz", + "integrity": "sha512-ISZjT44s59O8xKsPEIesiIydMG/sCXoMBCqsphDm/WcbnuWLxxb+GcvSIIA5NjUw6F8Tex7s5/LM2yDy8RqYBQ==", "cpu": [ "arm64" ], @@ -3370,9 +3363,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.6.tgz", - "integrity": "sha512-A4spQhwnWVpjWDLXnOW9PSinO2PTKJQNRmL/aIl2U/O+RARls8doDfs6R41+DAXK0ccacvRyDpR46aVQJJCoCg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.14.tgz", + "integrity": "sha512-02c6JhLPJj10L2caH4U0zF8Hji4dOeahmuMl23stk0MU1wfd1OraE7rOloidSF8W5JTHkFdVo/O7uRUJJnUAJg==", "cpu": [ "x64" ], @@ -3387,9 +3380,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-musl": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.6.tgz", - "integrity": "sha512-YRee+6ZqdzgiQAHVSLfl3RYmqeeaWVCk796MhXhLQu2kJu2COHBkqlqsqKYx3p8Hmk5pGCQd2jTAoMWWFeyG2A==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.14.tgz", + "integrity": "sha512-TNGeLiN1XS66kQhxHG/7wMeQDOoL0S33x9BgmydbrWAb9Qw0KYdd8o1ifx4HOGDWhVmJ+Ul+JQ7lyknQFilO3Q==", "cpu": [ "x64" ], @@ -3404,9 +3397,9 @@ } }, "node_modules/@tailwindcss/oxide-wasm32-wasi": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.6.tgz", - "integrity": "sha512-qAp4ooTYrBQ5pk5jgg54/U1rCJ/9FLYOkkQ/nTE+bVMseMfB6O7J8zb19YTpWuu4UdfRf5zzOrNKfl6T64MNrQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.14.tgz", + "integrity": "sha512-uZYAsaW/jS/IYkd6EWPJKW/NlPNSkWkBlaeVBi/WsFQNP05/bzkebUL8FH1pdsqx4f2fH/bWFcUABOM9nfiJkQ==", "bundleDependencies": [ "@napi-rs/wasm-runtime", "@emnapi/core", @@ -3422,21 +3415,81 @@ "license": "MIT", "optional": true, "dependencies": { - "@emnapi/core": "^1.4.3", - "@emnapi/runtime": "^1.4.3", - "@emnapi/wasi-threads": "^1.0.2", - "@napi-rs/wasm-runtime": "^0.2.9", - "@tybys/wasm-util": "^0.9.0", - "tslib": "^2.8.0" + "@emnapi/core": "^1.5.0", + "@emnapi/runtime": "^1.5.0", + "@emnapi/wasi-threads": "^1.1.0", + "@napi-rs/wasm-runtime": "^1.0.5", + "@tybys/wasm-util": "^0.10.1", + "tslib": "^2.4.0" }, "engines": { "node": ">=14.0.0" } }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": { + "version": "1.5.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.1.0", + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": { + "version": "1.5.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": { + "version": "1.1.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": { + "version": "1.0.5", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "^1.5.0", + "@emnapi/runtime": "^1.5.0", + "@tybys/wasm-util": "^0.10.1" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": { + "version": "2.8.1", + "dev": true, + "inBundle": true, + "license": "0BSD", + "optional": true + }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.6.tgz", - "integrity": "sha512-nqpDWk0Xr8ELO/nfRUDjk1pc9wDJ3ObeDdNMHLaymc4PJBWj11gdPCWZFKSK2AVKjJQC7J2EfmSmf47GN7OuLg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.14.tgz", + "integrity": "sha512-Az0RnnkcvRqsuoLH2Z4n3JfAef0wElgzHD5Aky/e+0tBUxUhIeIqFBTMNQvmMRSP15fWwmvjBxZ3Q8RhsDnxAA==", "cpu": [ "arm64" ], @@ -3451,9 +3504,9 @@ } }, "node_modules/@tailwindcss/oxide-win32-x64-msvc": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.6.tgz", - "integrity": "sha512-5k9xF33xkfKpo9wCvYcegQ21VwIBU1/qEbYlVukfEIyQbEA47uK8AAwS7NVjNE3vHzcmxMYwd0l6L4pPjjm1rQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.14.tgz", + "integrity": "sha512-ttblVGHgf68kEE4om1n/n44I0yGPkCPbLsqzjvybhpwa6mKKtgFfAzy6btc3HRmuW7nHe0OOrSeNP9sQmmH9XA==", "cpu": [ "x64" ], @@ -3468,26 +3521,19 @@ } }, "node_modules/@tailwindcss/postcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.6.tgz", - "integrity": "sha512-ELq+gDMBuRXPJlpE3PEen+1MhnHAQQrh2zF0dI1NXOlEWfr2qWf2CQdr5jl9yANv8RErQaQ2l6nIFO9OSCVq/g==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.14.tgz", + "integrity": "sha512-BdMjIxy7HUNThK87C7BC8I1rE8BVUsfNQSI5siQ4JK3iIa3w0XyVvVL9SXLWO//CtYTcp1v7zci0fYwJOjB+Zg==", "dev": true, "license": "MIT", "dependencies": { "@alloc/quick-lru": "^5.2.0", - "@tailwindcss/node": "4.1.6", - "@tailwindcss/oxide": "4.1.6", + "@tailwindcss/node": "4.1.14", + "@tailwindcss/oxide": "4.1.14", "postcss": "^8.4.41", - "tailwindcss": "4.1.6" + "tailwindcss": "4.1.14" } }, - "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", - "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", - "dev": true, - "license": "MIT" - }, "node_modules/@testing-library/dom": { "version": "10.4.1", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", @@ -3812,12 +3858,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.3.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz", - "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==", + "version": "24.8.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.8.1.tgz", + "integrity": "sha512-alv65KGRadQVfVcG69MuB4IzdYVpRwMG/mq8KWOaoOdyY617P5ivaDiMCGOFDWD2sAn5Q0mR3mRtUOgm99hL9Q==", "license": "MIT", "dependencies": { - "undici-types": "~7.10.0" + "undici-types": "~7.14.0" } }, "node_modules/@types/node-fetch": { @@ -5850,9 +5896,9 @@ "license": "MIT" }, "node_modules/enhanced-resolve": { - "version": "5.18.1", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz", - "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==", + "version": "5.18.3", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz", + "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==", "dev": true, "license": "MIT", "dependencies": { @@ -9128,9 +9174,9 @@ } }, "node_modules/jiti": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", - "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", + "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", "dev": true, "license": "MIT", "bin": { @@ -9368,9 +9414,9 @@ } }, "node_modules/lightningcss": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz", - "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz", + "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==", "dev": true, "license": "MPL-2.0", "dependencies": { @@ -9384,22 +9430,22 @@ "url": "https://opencollective.com/parcel" }, "optionalDependencies": { - "lightningcss-darwin-arm64": "1.29.2", - "lightningcss-darwin-x64": "1.29.2", - "lightningcss-freebsd-x64": "1.29.2", - "lightningcss-linux-arm-gnueabihf": "1.29.2", - "lightningcss-linux-arm64-gnu": "1.29.2", - "lightningcss-linux-arm64-musl": "1.29.2", - "lightningcss-linux-x64-gnu": "1.29.2", - "lightningcss-linux-x64-musl": "1.29.2", - "lightningcss-win32-arm64-msvc": "1.29.2", - "lightningcss-win32-x64-msvc": "1.29.2" + "lightningcss-darwin-arm64": "1.30.1", + "lightningcss-darwin-x64": "1.30.1", + "lightningcss-freebsd-x64": "1.30.1", + "lightningcss-linux-arm-gnueabihf": "1.30.1", + "lightningcss-linux-arm64-gnu": "1.30.1", + "lightningcss-linux-arm64-musl": "1.30.1", + "lightningcss-linux-x64-gnu": "1.30.1", + "lightningcss-linux-x64-musl": "1.30.1", + "lightningcss-win32-arm64-msvc": "1.30.1", + "lightningcss-win32-x64-msvc": "1.30.1" } }, "node_modules/lightningcss-darwin-arm64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz", - "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz", + "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==", "cpu": [ "arm64" ], @@ -9418,9 +9464,9 @@ } }, "node_modules/lightningcss-darwin-x64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz", - "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz", + "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==", "cpu": [ "x64" ], @@ -9439,9 +9485,9 @@ } }, "node_modules/lightningcss-freebsd-x64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz", - "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz", + "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==", "cpu": [ "x64" ], @@ -9460,9 +9506,9 @@ } }, "node_modules/lightningcss-linux-arm-gnueabihf": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz", - "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz", + "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==", "cpu": [ "arm" ], @@ -9481,9 +9527,9 @@ } }, "node_modules/lightningcss-linux-arm64-gnu": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz", - "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz", + "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==", "cpu": [ "arm64" ], @@ -9502,9 +9548,9 @@ } }, "node_modules/lightningcss-linux-arm64-musl": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz", - "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz", + "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==", "cpu": [ "arm64" ], @@ -9523,9 +9569,9 @@ } }, "node_modules/lightningcss-linux-x64-gnu": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz", - "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz", + "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==", "cpu": [ "x64" ], @@ -9544,9 +9590,9 @@ } }, "node_modules/lightningcss-linux-x64-musl": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz", - "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz", + "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==", "cpu": [ "x64" ], @@ -9565,9 +9611,9 @@ } }, "node_modules/lightningcss-win32-arm64-msvc": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz", - "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz", + "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==", "cpu": [ "arm64" ], @@ -9586,9 +9632,9 @@ } }, "node_modules/lightningcss-win32-x64-msvc": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz", - "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz", + "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==", "cpu": [ "x64" ], @@ -9726,13 +9772,13 @@ } }, "node_modules/magic-string": { - "version": "0.30.17", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", - "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", + "version": "0.30.19", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.19.tgz", + "integrity": "sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==", "dev": true, "license": "MIT", "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" + "@jridgewell/sourcemap-codec": "^1.5.5" } }, "node_modules/make-dir": { @@ -10717,9 +10763,9 @@ } }, "node_modules/minizlib": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz", - "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", + "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", "dev": true, "license": "MIT", "dependencies": { @@ -10729,22 +10775,6 @@ "node": ">= 18" } }, - "node_modules/mkdirp": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", - "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", - "dev": true, - "license": "MIT", - "bin": { - "mkdirp": "dist/cjs/src/bin.js" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/motion-dom": { "version": "12.23.23", "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz", @@ -12989,34 +13019,37 @@ } }, "node_modules/tailwindcss": { - "version": "4.1.13", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz", - "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.14.tgz", + "integrity": "sha512-b7pCxjGO98LnxVkKjaZSDeNuljC4ueKUddjENJOADtubtdo8llTaJy7HwBMeLNSSo2N5QIAgklslK1+Ir8r6CA==", "dev": true, "license": "MIT" }, "node_modules/tapable": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", - "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", + "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", "dev": true, "license": "MIT", "engines": { "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" } }, "node_modules/tar": { - "version": "7.4.3", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz", - "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==", + "version": "7.5.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz", + "integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==", "dev": true, "license": "ISC", "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", - "minizlib": "^3.0.1", - "mkdirp": "^3.0.1", + "minizlib": "^3.1.0", "yallist": "^5.0.0" }, "engines": { @@ -13418,9 +13451,9 @@ } }, "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.14.0.tgz", + "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==", "license": "MIT" }, "node_modules/unified": { diff --git a/scripts/docker.sh b/scripts/docker.sh index 1ba1d9adf..a0690c8a9 100755 --- a/scripts/docker.sh +++ b/scripts/docker.sh @@ -156,6 +156,16 @@ DISTRO=$(echo "$DISTRO" | sed 's/^docker://') CONTAINER_NAME="llama-stack-test-$DISTRO" +should_copy_source() { + if [[ "$USE_COPY_NOT_MOUNT" == "true" ]]; then + return 0 + fi + if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then + return 0 + fi + return 1 +} + # Function to check if container is running is_container_running() { docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$" @@ -183,20 +193,29 @@ stop_container() { build_image() { echo "=== Building Docker Image for distribution: $DISTRO ===" # Get the repo root (parent of scripts directory) - SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + local script_dir + script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + local repo_root + repo_root=$(cd "$script_dir/.." && pwd) - # Determine whether to copy or mount source - # Copy in CI or if explicitly requested, otherwise mount for live development - BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT" - if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then - echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})" - BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" - else - echo "Will mount source for live development" + local containerfile="$repo_root/containers/Containerfile" + if [[ ! -f "$containerfile" ]]; then + echo "❌ Containerfile not found at $containerfile" + exit 1 fi - if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then + local build_cmd=( + docker + build + "$repo_root" + -f "$containerfile" + --tag "localhost/distribution-$DISTRO:dev" + --build-arg "DISTRO_NAME=$DISTRO" + --build-arg "INSTALL_MODE=editable" + --build-arg "LLAMA_STACK_DIR=/workspace" + ) + + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 fi @@ -224,7 +243,7 @@ start_container() { # Check if image exists (with or without localhost/ prefix) if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then echo "❌ Error: Image distribution-$DISTRO:dev does not exist" - echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container" + echo "Either build it first without --no-rebuild, or run: docker build . -f containers/Containerfile --build-arg DISTRO_NAME=$DISTRO --tag localhost/distribution-$DISTRO:dev" exit 1 fi echo "✅ Found existing image for distribution-$DISTRO:dev" @@ -236,8 +255,10 @@ start_container() { echo "=== Starting Docker Container ===" # Get the repo root for volume mount - SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + local script_dir + script_dir=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd) + local repo_root + repo_root=$(cd "$script_dir/.." && pwd) # Determine the actual image name (may have localhost/ prefix) IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) @@ -279,10 +300,18 @@ start_container() { NETWORK_MODE="--network host" fi + local source_mount="" + if should_copy_source; then + echo "Source baked into image (no volume mount)" + else + source_mount="-v \"$repo_root\":/workspace" + echo "Mounting $repo_root into /workspace" + fi + docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \ -p $PORT:$PORT \ $DOCKER_ENV_VARS \ - -v "$REPO_ROOT":/app/llama-stack-source \ + $source_mount \ "$IMAGE_NAME" \ --port $PORT diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index e19a5cc55..93739052b 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -238,6 +238,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then echo "Stopping Docker container..." container_name="llama-stack-test-$DISTRO" if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then + echo "Dumping container logs before stopping..." + docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true echo "Stopping and removing container: $container_name" docker stop "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true @@ -252,19 +254,24 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then export LLAMA_STACK_PORT=8321 echo "=== Building Docker Image for distribution: $DISTRO ===" - # Set LLAMA_STACK_DIR to repo root - # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development - BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR" - if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then - echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image" - BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" - else - echo "Local mode: will mount source for live development" + containerfile="$ROOT_DIR/containers/Containerfile" + if [[ ! -f "$containerfile" ]]; then + echo "❌ Containerfile not found at $containerfile" + exit 1 fi - eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container" + build_cmd=( + docker + build + "$ROOT_DIR" + -f "$containerfile" + --tag "localhost/distribution-$DISTRO:dev" + --build-arg "DISTRO_NAME=$DISTRO" + --build-arg "INSTALL_MODE=editable" + --build-arg "LLAMA_STACK_DIR=/workspace" + ) - if [ $? -ne 0 ]; then + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 fi @@ -304,7 +311,6 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then docker run -d --network host --name "$container_name" \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ $DOCKER_ENV_VARS \ - -v $ROOT_DIR:/app/llama-stack-source \ "$IMAGE_NAME" \ --port $LLAMA_STACK_PORT @@ -404,6 +410,21 @@ elif [ $exit_code -eq 5 ]; then echo "⚠️ No tests collected (pattern matched no tests)" else echo "❌ Tests failed" + echo "" + echo "=== Dumping last 100 lines of logs for debugging ===" + + # Output server or container logs based on stack config + if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then + echo "--- Last 100 lines of server.log ---" + tail -100 server.log + elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then + docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log" + if [[ -f "$docker_log_file" ]]; then + echo "--- Last 100 lines of $docker_log_file ---" + tail -100 "$docker_log_file" + fi + fi + exit 1 fi diff --git a/tests/external/run-byoa.yaml b/tests/external/run-byoa.yaml index 5774ae9da..4d63046c6 100644 --- a/tests/external/run-byoa.yaml +++ b/tests/external/run-byoa.yaml @@ -7,6 +7,24 @@ providers: - provider_id: kaze provider_type: remote::kaze config: {} +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default external_apis_dir: ~/.llama/apis.d external_providers_dir: ~/.llama/providers.d server: diff --git a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json index 4d4331740..067b7d254 100644 --- a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json +++ b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json @@ -548,5 +548,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json new file mode 100644 index 000000000..aa61b7dbe --- /dev/null +++ b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json @@ -0,0 +1,447 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_5qverjg6", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_5qverjg6", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": { + "type": "function", + "function": { + "name": "get_boiling_point" + } + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0940d1521204", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json new file mode 100644 index 000000000..3cf297c34 --- /dev/null +++ b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json @@ -0,0 +1,888 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant and speak in pirate language." + }, + { + "role": "user", + "content": "What is the capital of France?" + }, + { + "role": "assistant", + "content": "The capital of France is Paris." + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " Yer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " look", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " fer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " port", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " o", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " eh", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " That", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " one", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "!", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " Yer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " won", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "'t", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " go", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " astr", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "ay", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " answer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": " mate", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "y", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0f5443c07d15", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 32, + "prompt_tokens": 50, + "total_tokens": 82, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json index d606edb37..7efea91ba 100644 --- a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json +++ b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json new file mode 100644 index 000000000..b899e0c2d --- /dev/null +++ b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json @@ -0,0 +1,256 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-15b23045b5cd", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 8, + "prompt_tokens": 32, + "total_tokens": 40, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json index b8b22f51d..407ac0655 100644 --- a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json +++ b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json index 4d7a1d1e4..241fb6127 100644 --- a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json +++ b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json @@ -55,7 +55,7 @@ "choices": [ { "delta": { - "content": "'m", + "content": "'d", "function_call": null, "refusal": null, "role": "assistant", @@ -81,7 +81,7 @@ "choices": [ { "delta": { - "content": " not", + "content": " be", "function_call": null, "refusal": null, "role": "assistant", @@ -107,7 +107,7 @@ "choices": [ { "delta": { - "content": " able", + "content": " happy", "function_call": null, "refusal": null, "role": "assistant", @@ -159,7 +159,7 @@ "choices": [ { "delta": { - "content": " provide", + "content": " help", "function_call": null, "refusal": null, "role": "assistant", @@ -185,7 +185,7 @@ "choices": [ { "delta": { - "content": " real", + "content": " you", "function_call": null, "refusal": null, "role": "assistant", @@ -211,7 +211,59 @@ "choices": [ { "delta": { - "content": "-time", + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", "function_call": null, "refusal": null, "role": "assistant", @@ -282,6 +334,58 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -393,189 +497,7 @@ "choices": [ { "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " tell", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " you", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " that", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " Japan", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " has", + "content": "'m", "function_call": null, "refusal": null, "role": "assistant", @@ -627,7 +549,7 @@ "choices": [ { "delta": { - "content": " humid", + "content": " large", "function_call": null, "refusal": null, "role": "assistant", @@ -653,7 +575,7 @@ "choices": [ { "delta": { - "content": " subt", + "content": " language", "function_call": null, "refusal": null, "role": "assistant", @@ -679,7 +601,7 @@ "choices": [ { "delta": { - "content": "ropical", + "content": " model", "function_call": null, "refusal": null, "role": "assistant", @@ -705,7 +627,7 @@ "choices": [ { "delta": { - "content": " climate", + "content": ",", "function_call": null, "refusal": null, "role": "assistant", @@ -731,7 +653,7 @@ "choices": [ { "delta": { - "content": " with", + "content": " I", "function_call": null, "refusal": null, "role": "assistant", @@ -757,7 +679,7 @@ "choices": [ { "delta": { - "content": " hot", + "content": " don", "function_call": null, "refusal": null, "role": "assistant", @@ -783,7 +705,7 @@ "choices": [ { "delta": { - "content": " summers", + "content": "'t", "function_call": null, "refusal": null, "role": "assistant", @@ -809,7 +731,7 @@ "choices": [ { "delta": { - "content": " and", + "content": " have", "function_call": null, "refusal": null, "role": "assistant", @@ -835,7 +757,7 @@ "choices": [ { "delta": { - "content": " cold", + "content": " real", "function_call": null, "refusal": null, "role": "assistant", @@ -861,7 +783,7 @@ "choices": [ { "delta": { - "content": " winters", + "content": "-time", "function_call": null, "refusal": null, "role": "assistant", @@ -887,111 +809,7 @@ "choices": [ { "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "If", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " you", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "'d", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " like", + "content": " access", "function_call": null, "refusal": null, "role": "assistant", @@ -1043,7 +861,449 @@ "choices": [ { "delta": { - "content": " know", + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " conditions", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "That", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " being", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " said", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " suggest", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " some", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " ways", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " out", "function_call": null, "refusal": null, "role": "assistant", @@ -1147,59 +1407,7 @@ "choices": [ { "delta": { - "content": " or", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " forecast", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " for", + "content": " in", "function_call": null, "refusal": null, "role": "assistant", @@ -1251,7 +1459,7 @@ "choices": [ { "delta": { - "content": ",", + "content": ":\n\n", "function_call": null, "refusal": null, "role": "assistant", @@ -1277,7 +1485,7 @@ "choices": [ { "delta": { - "content": " I", + "content": "1", "function_call": null, "refusal": null, "role": "assistant", @@ -1303,7 +1511,7 @@ "choices": [ { "delta": { - "content": " recommend", + "content": ".", "function_call": null, "refusal": null, "role": "assistant", @@ -1329,59 +1537,7 @@ "choices": [ { "delta": { - "content": " checking", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " reliable", + "content": " Check", "function_call": null, "refusal": null, "role": "assistant", @@ -1433,7 +1589,7 @@ "choices": [ { "delta": { - "content": " source", + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -1459,7 +1615,7 @@ "choices": [ { "delta": { - "content": " such", + "content": " websites", "function_call": null, "refusal": null, "role": "assistant", @@ -1485,7 +1641,7 @@ "choices": [ { "delta": { - "content": " as", + "content": ":", "function_call": null, "refusal": null, "role": "assistant", @@ -1511,7 +1667,7 @@ "choices": [ { "delta": { - "content": ":\n\n", + "content": " You", "function_call": null, "refusal": null, "role": "assistant", @@ -1537,7 +1693,7 @@ "choices": [ { "delta": { - "content": "*", + "content": " can", "function_call": null, "refusal": null, "role": "assistant", @@ -1563,7 +1719,267 @@ "choices": [ { "delta": { - "content": " The", + "content": " check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " websites", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Acc", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "u", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".com", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", "function_call": null, "refusal": null, "role": "assistant", @@ -1797,7 +2213,397 @@ "choices": [ { "delta": { - "content": " website", + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " condition", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " forecast", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "2", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Use", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " mobile", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " app", "function_call": null, "refusal": null, "role": "assistant", @@ -1849,7 +2655,7 @@ "choices": [ { "delta": { - "content": " \n", + "content": " provide", "function_call": null, "refusal": null, "role": "assistant", @@ -2057,7 +2863,7 @@ "choices": [ { "delta": { - "content": "*", + "content": " real", "function_call": null, "refusal": null, "role": "assistant", @@ -2083,7 +2889,7 @@ "choices": [ { "delta": { - "content": " Acc", + "content": "-time", "function_call": null, "refusal": null, "role": "assistant", @@ -2109,7 +2915,7 @@ "choices": [ { "delta": { - "content": "u", + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -2135,7 +2941,7 @@ "choices": [ { "delta": { - "content": "Weather", + "content": " information", "function_call": null, "refusal": null, "role": "assistant", @@ -2161,7 +2967,7 @@ "choices": [ { "delta": { - "content": ":", + "content": ",", "function_call": null, "refusal": null, "role": "assistant", @@ -2187,7 +2993,7 @@ "choices": [ { "delta": { - "content": " https", + "content": " such", "function_call": null, "refusal": null, "role": "assistant", @@ -2213,189 +3019,7 @@ "choices": [ { "delta": { - "content": "://", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "www", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ".acc", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "u", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ".com", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "/\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "*", + "content": " as", "function_call": null, "refusal": null, "role": "assistant", @@ -2466,6 +3090,240 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Underground", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "3", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " social", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " media", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -2499,7 +3357,7 @@ "choices": [ { "delta": { - "content": " https", + "content": " You", "function_call": null, "refusal": null, "role": "assistant", @@ -2525,7 +3383,7 @@ "choices": [ { "delta": { - "content": "://", + "content": " can", "function_call": null, "refusal": null, "role": "assistant", @@ -2551,7 +3409,7 @@ "choices": [ { "delta": { - "content": "dark", + "content": " also", "function_call": null, "refusal": null, "role": "assistant", @@ -2577,7 +3435,7 @@ "choices": [ { "delta": { - "content": "sky", + "content": " check", "function_call": null, "refusal": null, "role": "assistant", @@ -2603,7 +3461,7 @@ "choices": [ { "delta": { - "content": ".net", + "content": " social", "function_call": null, "refusal": null, "role": "assistant", @@ -2629,7 +3487,7 @@ "choices": [ { "delta": { - "content": "/\n\n", + "content": " media", "function_call": null, "refusal": null, "role": "assistant", @@ -2655,7 +3513,7 @@ "choices": [ { "delta": { - "content": "Please", + "content": " platforms", "function_call": null, "refusal": null, "role": "assistant", @@ -2681,7 +3539,215 @@ "choices": [ { "delta": { - "content": " keep", + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Twitter", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " Facebook", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " updates", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " weather", "function_call": null, "refusal": null, "role": "assistant", @@ -2733,7 +3799,85 @@ "choices": [ { "delta": { - "content": " mind", + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "Please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " note", "function_call": null, "refusal": null, "role": "assistant", @@ -2778,6 +3922,578 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " knowledge", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " cutoff", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " December", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "202", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "3", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " so", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " may", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " have", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " most", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " up", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "-to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": "-date", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1adb6f4621ea", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -2830,526 +4546,6 @@ "usage": null } }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " change", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " quickly", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " it", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " always", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " good", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " idea", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " check", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " latest", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " forecast", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " before", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " planning", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " your", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-1adb6f4621ea", - "choices": [ - { - "delta": { - "content": " activities", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -3413,9 +4609,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 131, + "completion_tokens": 176, "prompt_tokens": 32, - "total_tokens": 163, + "total_tokens": 208, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json new file mode 100644 index 000000000..4c0fa6cce --- /dev/null +++ b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_klhbln13", + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_klhbln13", + "content": "-212" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "212", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " degrees", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": " Celsius", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-1f0aef747544", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json index 992648658..9f9397057 100644 --- a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json +++ b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json @@ -56,7 +56,7 @@ "tool_calls": [ { "index": 0, - "id": "call_os3xa9go", + "id": "call_6nqo069h", "function": { "arguments": "{\"city\":\"Tokyo\"}", "name": "get_weather" @@ -115,9 +115,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 15, + "completion_tokens": 18, "prompt_tokens": 179, - "total_tokens": 194, + "total_tokens": 197, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json new file mode 100644 index 000000000..21d5a0663 --- /dev/null +++ b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json @@ -0,0 +1,416 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant Always respond with tool calls no matter what. " + }, + { + "role": "user", + "content": "Get the boiling point of polyjuice with a tool call." + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_9x4z21g1", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_9x4z21g1", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " Poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-256d85719096", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json index a94c52c72..9a1781046 100644 --- a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json +++ b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json index 3699fbc8b..3a1f57ee8 100644 --- a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json +++ b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_ixvkq8fh", + "id": "call_icfpgg5q", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json index 4f001f5bf..0a27ddb7d 100644 --- a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json +++ b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json new file mode 100644 index 000000000..bfbbcb87b --- /dev/null +++ b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_icfpgg5q", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_icfpgg5q", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-45d0aabc5023", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json index 89fa490c3..755276918 100644 --- a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json +++ b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json @@ -45,7 +45,33 @@ "choices": [ { "delta": { - "content": "The", + "content": "Italy", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "'s", "function_call": null, "refusal": null, "role": "assistant", @@ -90,58 +116,6 @@ "usage": null } }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-4da32cdf48ae", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-4da32cdf48ae", - "choices": [ - { - "delta": { - "content": " Italy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -194,6 +168,1124 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " seat", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " EU", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " well", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " has", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " been", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " centuries", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " significant", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " role", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " international", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " politics", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " being", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " an", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " important", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " location", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " various", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " historical", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " events", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " such", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " signing", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " treaty", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": " West", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "ph", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-4da32cdf48ae", + "choices": [ + { + "delta": { + "content": "alia", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -257,9 +1349,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 8, + "completion_tokens": 50, "prompt_tokens": 82, - "total_tokens": 90, + "total_tokens": 132, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json index cac9a6db2..988b270d7 100644 --- a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json +++ b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json @@ -48,7 +48,7 @@ "tool_calls": [ { "index": 0, - "id": "call_lqrdy0rt", + "id": "call_x427af31", "function": { "arguments": "{}", "name": "get_current_time" @@ -107,9 +107,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 14, + "completion_tokens": 12, "prompt_tokens": 161, - "total_tokens": 175, + "total_tokens": 173, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json index 49ca098d5..009646e27 100644 --- a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json +++ b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json @@ -56,7 +56,7 @@ "tool_calls": [ { "index": 0, - "id": "call_4ibtjudr", + "id": "call_wkjhgmpf", "function": { "arguments": "{\"city\":\"Tokyo\"}", "name": "get_weather" diff --git a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json index 298e0e3b8..8b8f04ae6 100644 --- a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json +++ b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_pojpzwm8", + "id": "call_klhbln13", "function": { "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point_with_metadata" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json index fc263d5e9..d5d249587 100644 --- a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json +++ b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json @@ -81,33 +81,7 @@ "choices": [ { "delta": { - "content": " not", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " able", + "content": " happy", "function_call": null, "refusal": null, "role": "assistant", @@ -159,267 +133,7 @@ "choices": [ { "delta": { - "content": " provide", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " real", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "-time", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " information", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " However", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " I", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " give", + "content": " help", "function_call": null, "refusal": null, "role": "assistant", @@ -471,7 +185,7 @@ "choices": [ { "delta": { - "content": " an", + "content": " with", "function_call": null, "refusal": null, "role": "assistant", @@ -497,7 +211,7 @@ "choices": [ { "delta": { - "content": " idea", + "content": " your", "function_call": null, "refusal": null, "role": "assistant", @@ -523,553 +237,7 @@ "choices": [ { "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " what", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " typical", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " weather", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " like", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " during", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " different", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " seasons", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Spring", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "March", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " May", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Mild", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", + "content": " question", "function_call": null, "refusal": null, "role": "assistant", @@ -1121,7 +289,7 @@ "choices": [ { "delta": { - "content": " usually", + "content": " but", "function_call": null, "refusal": null, "role": "assistant", @@ -1147,7 +315,85 @@ "choices": [ { "delta": { - "content": " ranging", + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " more", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " information", "function_call": null, "refusal": null, "role": "assistant", @@ -1199,7 +445,7 @@ "choices": [ { "delta": { - "content": " ", + "content": " you", "function_call": null, "refusal": null, "role": "assistant", @@ -1225,7 +471,7 @@ "choices": [ { "delta": { - "content": "10", + "content": ".", "function_call": null, "refusal": null, "role": "assistant", @@ -1251,3725 +497,7 @@ "choices": [ { "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "20", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "50", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "68", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " It", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " great", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " time", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " visit", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " for", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " cherry", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " blossom", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " season", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Summer", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "June", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " August", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Hot", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " humid", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " often", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " exceeding", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "30", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "86", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Summer", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " rainy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " heavy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " down", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "p", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "ours", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " during", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " after", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "no", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "ons", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Aut", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "umn", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "September", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " November", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Comfort", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "able", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ranging", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " from", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "10", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "20", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "50", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "68", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Autumn", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " foliage", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " is", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " a", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " highlight", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Tokyo", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "'s", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " scenery", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Winter", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "December", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " to", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " February", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ")**", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ":", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Cold", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " and", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " snowy", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ",", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " with", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " temperatures", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " sometimes", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " dropping", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " below", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " ", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "0", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0C", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " (", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "32", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "\u00b0F", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ").", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " Snow", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "fall", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " can", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " be", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " significant", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " in", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " some", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " parts", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " of", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " the", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " city", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": ".\n\n", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": "Please", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " note", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " that", - "function_call": null, - "refusal": null, - "role": "assistant", - "tool_calls": null - }, - "finish_reason": null, - "index": 0, - "logprobs": null - } - ], - "created": 0, - "model": "llama3.2:3b-instruct-fp16", - "object": "chat.completion.chunk", - "service_tier": null, - "system_fingerprint": "fp_ollama", - "usage": null - } - }, - { - "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", - "__data__": { - "id": "rec-6b207540bc67", - "choices": [ - { - "delta": { - "content": " these", + "content": " There", "function_call": null, "refusal": null, "role": "assistant", @@ -5021,7 +549,7 @@ "choices": [ { "delta": { - "content": " general", + "content": " many", "function_call": null, "refusal": null, "role": "assistant", @@ -5047,7 +575,7 @@ "choices": [ { "delta": { - "content": " temperature", + "content": " cities", "function_call": null, "refusal": null, "role": "assistant", @@ -5073,7 +601,33 @@ "choices": [ { "delta": { - "content": " ranges", + "content": " named", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", "function_call": null, "refusal": null, "role": "assistant", @@ -5151,7 +705,7 @@ "choices": [ { "delta": { - "content": " actual", + "content": " each", "function_call": null, "refusal": null, "role": "assistant", @@ -5177,7 +731,7 @@ "choices": [ { "delta": { - "content": " weather", + "content": " one", "function_call": null, "refusal": null, "role": "assistant", @@ -5203,7 +757,7 @@ "choices": [ { "delta": { - "content": " conditions", + "content": " has", "function_call": null, "refusal": null, "role": "assistant", @@ -5229,7 +783,7 @@ "choices": [ { "delta": { - "content": " may", + "content": " a", "function_call": null, "refusal": null, "role": "assistant", @@ -5255,7 +809,7 @@ "choices": [ { "delta": { - "content": " vary", + "content": " different", "function_call": null, "refusal": null, "role": "assistant", @@ -5281,7 +835,7 @@ "choices": [ { "delta": { - "content": " from", + "content": " climate", "function_call": null, "refusal": null, "role": "assistant", @@ -5307,7 +861,319 @@ "choices": [ { "delta": { - "content": " year", + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "Could", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " tell", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " city", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Japan", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " are", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " referring", "function_call": null, "refusal": null, "role": "assistant", @@ -5359,7 +1225,917 @@ "choices": [ { "delta": { - "content": " year", + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " itself", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " always", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " good", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " choice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " often", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " gets", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " confused", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " actual", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " name", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " large", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " populous", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " area", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " K", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "anto", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " region", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " includes", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " larger", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " areas", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " surrounding", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " Tokyo", "function_call": null, "refusal": null, "role": "assistant", @@ -5404,6 +2180,708 @@ "usage": null } }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " \n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "If", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " does", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " give", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " us", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " enough", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " grounds", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " then", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " could", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " what", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " approximate", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " month", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " want", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": "for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " seasonal", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": " changes", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-6b207540bc67", + "choices": [ + { + "delta": { + "content": ")?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, { "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", "__data__": { @@ -5441,9 +2919,9 @@ "service_tier": null, "system_fingerprint": "fp_ollama", "usage": { - "completion_tokens": 208, + "completion_tokens": 111, "prompt_tokens": 32, - "total_tokens": 240, + "total_tokens": 143, "completion_tokens_details": null, "prompt_tokens_details": null } diff --git a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json index 41c4f97ae..a178476e1 100644 --- a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json +++ b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json index dce0c2e4d..7f7bf13ca 100644 --- a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json +++ b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json index d8b125dad..a1464e8c3 100644 --- a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json +++ b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json index e11d38095..665e53245 100644 --- a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json +++ b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_rwasjr3y", + "id": "call_zqu5i0ti", "function": { "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json new file mode 100644 index 000000000..dfae71291 --- /dev/null +++ b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-7e794c73bf79", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json index c82ea6394..fa03baf5e 100644 --- a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json +++ b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json index c33ecca7e..c702a53aa 100644 --- a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json +++ b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json new file mode 100644 index 000000000..9d391c7c8 --- /dev/null +++ b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools0]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_zqu5i0ti", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_zqu5i0ti", + "content": "-212" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "212", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " degrees", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": " Celsius", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-8c4ec4715269", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json index b209de507..15f9b0f96 100644 --- a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json +++ b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json new file mode 100644 index 000000000..70d92b2bf --- /dev/null +++ b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-8fc418c02b8b", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json index 07b7f8331..16078a8c2 100644 --- a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json +++ b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json index aeb1fe320..ec3117ee3 100644 --- a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json +++ b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json @@ -1510,5 +1510,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json index 93155e18c..4d8a2a9ce 100644 --- a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json +++ b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json index 1903e3d19..5200b2e65 100644 --- a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json +++ b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json index d6ec4ea4b..52d599fe0 100644 --- a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json +++ b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json new file mode 100644 index 000000000..15a721ef9 --- /dev/null +++ b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json @@ -0,0 +1,260 @@ +{ + "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true, + "stream_options": { + "include_usage": true + } + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b3c24a0ab429", + "choices": [], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 8, + "prompt_tokens": 38, + "total_tokens": 46, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json new file mode 100644 index 000000000..50e14c9fc --- /dev/null +++ b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json @@ -0,0 +1,442 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_z1rt0qb1", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_z1rt0qb1", + "content": "-100" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "required", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.", + "parameters": { + "type": "object", + "properties": { + "liquid_name": { + "type": "string", + "description": "The name of the liquid" + }, + "celcius": { + "type": "boolean", + "description": "Whether to return the boiling point in Celcius" + } + }, + "required": [ + "liquid_name" + ] + } + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "100", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b4a47451a2af", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json index c7ecef75f..4527ab5cd 100644 --- a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json +++ b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_qryqpevz", + "id": "call_9x4z21g1", "function": { "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json index e3f54171f..a1332fddb 100644 --- a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json +++ b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json index 145596d38..64b28de5c 100644 --- a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json +++ b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json index a333490a4..ae1be7520 100644 --- a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json +++ b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json new file mode 100644 index 000000000..f17ae2ae3 --- /dev/null +++ b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json @@ -0,0 +1,59 @@ +{ + "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]", + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-da6fc54bb65d", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 0, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json index da06f3968..55e71cf27 100644 --- a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json +++ b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json @@ -71,7 +71,7 @@ "tool_calls": [ { "index": 0, - "id": "call_ur5tbdbt", + "id": "call_5qverjg6", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -121,5 +121,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json index cb2afc5ed..06d8a4305 100644 --- a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json +++ b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json @@ -54,5 +54,6 @@ } }, "is_streaming": false - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json index 2e1e9f4e5..dbb70df6c 100644 --- a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json +++ b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json @@ -66,7 +66,7 @@ "tool_calls": [ { "index": 0, - "id": "call_rq1pcgq7", + "id": "call_z1rt0qb1", "function": { "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}", "name": "get_boiling_point" @@ -116,5 +116,6 @@ } ], "is_streaming": true - } + }, + "id_normalization_mapping": {} } diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 675e2b904..d413d5201 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id): # Response should be either a function call or a message output_type = response.output[0].type assert output_type in ["function_call", "message"] + + +def test_response_with_instructions(openai_client, client_with_models, text_model_id): + """Test instructions parameter in the responses object.""" + if isinstance(client_with_models, LlamaStackAsLibraryClient): + pytest.skip("OpenAI responses are not supported when testing with library client yet.") + + client = openai_client + + messages = [ + { + "role": "user", + "content": "What is the capital of France?", + } + ] + + # First create a response without instructions parameter + response_w_o_instructions = client.responses.create( + model=text_model_id, + input=messages, + stream=False, + ) + + # Verify we have None in the instructions field + assert response_w_o_instructions.instructions is None + + # Next create a response and pass instructions parameter + instructions = "You are a helpful assistant." + response_with_instructions = client.responses.create( + model=text_model_id, + instructions=instructions, + input=messages, + stream=False, + ) + + # Verify we have a valid instructions field + assert response_with_instructions.instructions == instructions + + # Finally test instructions parameter with a previous response id + instructions2 = "You are a helpful assistant and speak in pirate language." + response_with_instructions2 = client.responses.create( + model=text_model_id, + instructions=instructions2, + input=messages, + previous_response_id=response_with_instructions.id, + stream=False, + ) + + # Verify instructions from previous response was not carried over to the next response + assert response_with_instructions2.instructions == instructions2 diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json new file mode 100644 index 000000000..77e244a01 --- /dev/null +++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json @@ -0,0 +1,44 @@ +{ + "test_id": null, + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama-guard3:1b", + "created": 1753937098, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:l6-v2", + "created": 1753936935, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.2:3b-instruct-fp16", + "created": 1753936925, + "object": "model", + "owned_by": "library" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 3137de0de..041d10f10 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -37,6 +37,9 @@ def pytest_sessionstart(session): if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ: os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay" + if "LLAMA_STACK_LOGGING" not in os.environ: + os.environ["LLAMA_STACK_LOGGING"] = "all=warning" + if "SQLITE_STORE_DIR" not in os.environ: os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp() @@ -317,3 +320,72 @@ def pytest_ignore_collect(path: str, config: pytest.Config) -> bool: if p.is_relative_to(rp): return False return True + + +def get_vector_io_provider_ids(client): + """Get all available vector_io provider IDs.""" + providers = [p for p in client.providers.list() if p.api == "vector_io"] + return [p.provider_id for p in providers] + + +def vector_provider_wrapper(func): + """Decorator to run a test against all available vector_io providers.""" + import functools + import os + + @functools.wraps(func) + def wrapper(*args, **kwargs): + # Get the vector_io_provider_id from the test arguments + import inspect + + sig = inspect.signature(func) + bound_args = sig.bind(*args, **kwargs) + bound_args.apply_defaults() + + vector_io_provider_id = bound_args.arguments.get("vector_io_provider_id") + if not vector_io_provider_id: + pytest.skip("No vector_io_provider_id provided") + + # Get client_with_models to check available providers + client_with_models = bound_args.arguments.get("client_with_models") + if client_with_models: + available_providers = get_vector_io_provider_ids(client_with_models) + if vector_io_provider_id not in available_providers: + pytest.skip(f"Provider '{vector_io_provider_id}' not available. Available: {available_providers}") + + return func(*args, **kwargs) + + # For replay tests, only use providers that are available in ci-tests environment + if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE") == "replay": + all_providers = ["faiss", "sqlite-vec"] + else: + # For live tests, try all providers (they'll skip if not available) + all_providers = [ + "faiss", + "sqlite-vec", + "milvus", + "chromadb", + "pgvector", + "weaviate", + "qdrant", + ] + + return pytest.mark.parametrize("vector_io_provider_id", all_providers)(wrapper) + + +@pytest.fixture +def vector_io_provider_id(request, client_with_models): + """Fixture that provides a specific vector_io provider ID, skipping if not available.""" + if hasattr(request, "param"): + requested_provider = request.param + available_providers = get_vector_io_provider_ids(client_with_models) + + if requested_provider not in available_providers: + pytest.skip(f"Provider '{requested_provider}' not available. Available: {available_providers}") + + return requested_provider + else: + provider_ids = get_vector_io_provider_ids(client_with_models) + if not provider_ids: + pytest.skip("No vector_io providers available") + return provider_ids[0] diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 68a30fc69..6a9e1f3b2 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient from openai import OpenAI from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail @@ -39,7 +40,7 @@ def is_port_available(port: int, host: str = "localhost") -> bool: def start_llama_stack_server(config_name: str) -> subprocess.Popen: """Start a llama stack server with the given config.""" - cmd = f"uv run --with llama-stack llama stack build --distro {config_name} --image-type venv --run" + cmd = f"uv run llama stack run {config_name}" devnull = open(os.devnull, "w") process = subprocess.Popen( shlex.split(cmd), @@ -236,9 +237,16 @@ def instantiate_llama_stack_client(session): if "=" in config: run_config = run_config_from_adhoc_config_spec(config) + + # --stack-config bypasses template so need this to set default embedding model + if "vector_io" in config and "inference" in config: + run_config.vector_stores = VectorStoresConfig( + embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5" + ) + run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") with open(run_config_file.name, "w") as f: - yaml.dump(run_config.model_dump(), f) + yaml.dump(run_config.model_dump(mode="json"), f) config = run_config_file.name client = LlamaStackAsLibraryClient( diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index 98bef0f2c..ad9115756 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -12,9 +12,15 @@ import pytest from llama_stack.core.access_control.access_control import default_policy from llama_stack.core.datatypes import User +from llama_stack.core.storage.datatypes import SqlStoreReference from llama_stack.providers.utils.sqlstore.api import ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig, sqlstore_impl +from llama_stack.providers.utils.sqlstore.sqlstore import ( + PostgresSqlStoreConfig, + SqliteSqlStoreConfig, + register_sqlstore_backends, + sqlstore_impl, +) def get_postgres_config(): @@ -55,8 +61,9 @@ def authorized_store(backend_config): config_func = backend_config config = config_func() - - base_sqlstore = sqlstore_impl(config) + backend_name = f"sql_{type(config).__name__.lower()}" + register_sqlstore_backends({backend_name: config}) + base_sqlstore = sqlstore_impl(SqlStoreReference(backend=backend_name, table_name="authorized_store")) authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy()) yield authorized_store diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py new file mode 100644 index 000000000..e9b80dc0c --- /dev/null +++ b/tests/integration/test_persistence_integration.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import yaml + +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.datatypes import ( + PostgresKVStoreConfig, + PostgresSqlStoreConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, +) + + +def test_starter_distribution_config_loads_and_resolves(): + """Integration: Actual starter config should parse and have correct storage structure.""" + with open("llama_stack/distributions/starter/run.yaml") as f: + config_dict = yaml.safe_load(f) + + config = StackRunConfig(**config_dict) + + # Config should have named backends and explicit store references + assert config.storage is not None + assert "kv_default" in config.storage.backends + assert "sql_default" in config.storage.backends + assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig) + assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig) + + stores = config.storage.stores + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.metadata.namespace == "registry" + + assert stores.inference is not None + assert stores.inference.backend == "sql_default" + assert stores.inference.table_name == "inference_store" + assert stores.inference.max_write_queue_size > 0 + assert stores.inference.num_writers > 0 + + assert stores.conversations is not None + assert stores.conversations.backend == "sql_default" + assert stores.conversations.table_name == "openai_conversations" + + +def test_postgres_demo_distribution_config_loads(): + """Integration: Postgres demo should use Postgres backend for all stores.""" + with open("llama_stack/distributions/postgres-demo/run.yaml") as f: + config_dict = yaml.safe_load(f) + + config = StackRunConfig(**config_dict) + + # Should have postgres backend + assert config.storage is not None + assert "kv_default" in config.storage.backends + assert "sql_default" in config.storage.backends + postgres_backend = config.storage.backends["sql_default"] + assert isinstance(postgres_backend, PostgresSqlStoreConfig) + assert postgres_backend.host == "${env.POSTGRES_HOST:=localhost}" + + kv_backend = config.storage.backends["kv_default"] + assert isinstance(kv_backend, PostgresKVStoreConfig) + + stores = config.storage.stores + # Stores target the Postgres backends explicitly + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.inference is not None + assert stores.inference.backend == "sql_default" diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index e21b233bc..626faf42d 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -8,14 +8,15 @@ import time from io import BytesIO import pytest -from llama_stack_client import BadRequestError, NotFoundError +from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError -from openai import NotFoundError as OpenAINotFoundError from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger +from ..conftest import vector_provider_wrapper + logger = get_logger(name=__name__, category="vector_io") @@ -133,8 +134,9 @@ def compat_client_with_empty_stores(compat_client): clear_files() +@vector_provider_wrapper def test_openai_create_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test creating a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -146,6 +148,7 @@ def test_openai_create_vector_store( metadata={"purpose": "testing", "environment": "integration"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -159,14 +162,18 @@ def test_openai_create_vector_store( assert hasattr(vector_store, "created_at") -def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models): +@vector_provider_wrapper +def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models, vector_io_provider_id): skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - vector_store = compat_client_with_empty_stores.vector_stores.create() + vector_store = compat_client_with_empty_stores.vector_stores.create( + extra_body={"provider_id": vector_io_provider_id} + ) assert vector_store.id +@vector_provider_wrapper def test_openai_list_vector_stores( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test listing vector stores using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -179,6 +186,7 @@ def test_openai_list_vector_stores( metadata={"type": "test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) store2 = client.vector_stores.create( @@ -186,6 +194,7 @@ def test_openai_list_vector_stores( metadata={"type": "test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -206,8 +215,9 @@ def test_openai_list_vector_stores( assert len(limited_response.data) == 1 +@vector_provider_wrapper def test_openai_retrieve_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test retrieving a specific vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -220,6 +230,7 @@ def test_openai_retrieve_vector_store( metadata={"purpose": "retrieval_test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -233,8 +244,9 @@ def test_openai_retrieve_vector_store( assert retrieved_store.object == "vector_store" +@vector_provider_wrapper def test_openai_update_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test modifying a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -247,6 +259,7 @@ def test_openai_update_vector_store( metadata={"version": "1.0"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) time.sleep(1) @@ -264,8 +277,9 @@ def test_openai_update_vector_store( assert modified_store.last_active_at > created_store.last_active_at +@vector_provider_wrapper def test_openai_delete_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test deleting a vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -278,6 +292,7 @@ def test_openai_delete_vector_store( metadata={"purpose": "deletion_test"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -294,8 +309,9 @@ def test_openai_delete_vector_store( client.vector_stores.retrieve(vector_store_id=created_store.id) +@vector_provider_wrapper def test_openai_vector_store_search_empty( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test searching an empty vector store using OpenAI API.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -308,6 +324,7 @@ def test_openai_vector_store_search_empty( metadata={"purpose": "search_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -323,8 +340,14 @@ def test_openai_vector_store_search_empty( assert search_response.has_more is False +@vector_provider_wrapper def test_openai_vector_store_with_chunks( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test vector store functionality with actual chunks using both OpenAI and native APIs.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -338,6 +361,7 @@ def test_openai_vector_store_with_chunks( metadata={"purpose": "chunks_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -380,6 +404,7 @@ def test_openai_vector_store_with_chunks( ("What inspires neural networks?", "doc4", "ai"), ], ) +@vector_provider_wrapper def test_openai_vector_store_search_relevance( compat_client_with_empty_stores, client_with_models, @@ -387,6 +412,7 @@ def test_openai_vector_store_search_relevance( test_case, embedding_model_id, embedding_dimension, + vector_io_provider_id, ): """Test that OpenAI vector store search returns relevant results for different queries.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -402,6 +428,7 @@ def test_openai_vector_store_search_relevance( metadata={"purpose": "relevance_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -430,8 +457,14 @@ def test_openai_vector_store_search_relevance( assert top_result.score > 0 +@vector_provider_wrapper def test_openai_vector_store_search_with_ranking_options( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test OpenAI vector store search with ranking options.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -445,6 +478,7 @@ def test_openai_vector_store_search_with_ranking_options( metadata={"purpose": "ranking_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -483,8 +517,14 @@ def test_openai_vector_store_search_with_ranking_options( assert result.score >= threshold +@vector_provider_wrapper def test_openai_vector_store_search_with_high_score_filter( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test that searching with text very similar to a document and high score threshold returns only that document.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -498,6 +538,7 @@ def test_openai_vector_store_search_with_high_score_filter( metadata={"purpose": "high_score_filtering"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -542,8 +583,14 @@ def test_openai_vector_store_search_with_high_score_filter( assert "python" in top_content.lower() or "programming" in top_content.lower() +@vector_provider_wrapper def test_openai_vector_store_search_with_max_num_results( - compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): """Test OpenAI vector store search with max_num_results.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -557,6 +604,7 @@ def test_openai_vector_store_search_with_max_num_results( metadata={"purpose": "max_num_results_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -577,8 +625,9 @@ def test_openai_vector_store_search_with_max_num_results( assert len(search_response.data) == 2 +@vector_provider_wrapper def test_openai_vector_store_attach_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store attach file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -591,6 +640,7 @@ def test_openai_vector_store_attach_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -637,8 +687,9 @@ def test_openai_vector_store_attach_file( assert "foobazbar" in top_content.lower() +@vector_provider_wrapper def test_openai_vector_store_attach_files_on_creation( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store attach files on creation.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -668,6 +719,7 @@ def test_openai_vector_store_attach_files_on_creation( file_ids=file_ids, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -700,8 +752,9 @@ def test_openai_vector_store_attach_files_on_creation( assert updated_vector_store.file_counts.failed == 0 +@vector_provider_wrapper def test_openai_vector_store_list_files( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store list files.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -714,6 +767,7 @@ def test_openai_vector_store_list_files( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -773,8 +827,9 @@ def test_openai_vector_store_list_files( assert updated_vector_store.file_counts.in_progress == 0 +@vector_provider_wrapper def test_openai_vector_store_list_files_invalid_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store list files with invalid vector store ID.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -783,14 +838,15 @@ def test_openai_vector_store_list_files_invalid_vector_store( if isinstance(compat_client, LlamaStackAsLibraryClient): errors = ValueError else: - errors = (NotFoundError, OpenAINotFoundError) + errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(errors): compat_client.vector_stores.files.list(vector_store_id="abc123") +@vector_provider_wrapper def test_openai_vector_store_retrieve_file_contents( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store retrieve file contents.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -803,6 +859,7 @@ def test_openai_vector_store_retrieve_file_contents( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -848,8 +905,9 @@ def test_openai_vector_store_retrieve_file_contents( assert file_contents.attributes == attributes +@vector_provider_wrapper def test_openai_vector_store_delete_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store delete file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -862,6 +920,7 @@ def test_openai_vector_store_delete_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -912,8 +971,9 @@ def test_openai_vector_store_delete_file( assert updated_vector_store.file_counts.in_progress == 0 +@vector_provider_wrapper def test_openai_vector_store_delete_file_removes_from_vector_store( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store delete file removes from vector store.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -926,6 +986,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -962,8 +1023,9 @@ def test_openai_vector_store_delete_file_removes_from_vector_store( assert not search_response.data +@vector_provider_wrapper def test_openai_vector_store_update_file( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test OpenAI vector store update file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -976,6 +1038,7 @@ def test_openai_vector_store_update_file( name="test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1017,8 +1080,9 @@ def test_openai_vector_store_update_file( assert retrieved_file.attributes["foo"] == "baz" +@vector_provider_wrapper def test_create_vector_store_files_duplicate_vector_store_name( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """ This test confirms that client.vector_stores.create() creates a unique ID @@ -1044,6 +1108,7 @@ def test_create_vector_store_files_duplicate_vector_store_name( name="test_store_with_files", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) assert vector_store.file_counts.completed == 0 @@ -1056,6 +1121,7 @@ def test_create_vector_store_files_duplicate_vector_store_name( name="test_store_with_files", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1086,8 +1152,15 @@ def test_create_vector_store_files_duplicate_vector_store_name( @pytest.mark.parametrize("search_mode", ["vector", "keyword", "hybrid"]) +@vector_provider_wrapper def test_openai_vector_store_search_modes( - llama_stack_client, client_with_models, sample_chunks, search_mode, embedding_model_id, embedding_dimension + llama_stack_client, + client_with_models, + sample_chunks, + search_mode, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, ): skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_models, search_mode) @@ -1097,6 +1170,7 @@ def test_openai_vector_store_search_modes( metadata={"purpose": "search_mode_testing"}, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1115,8 +1189,9 @@ def test_openai_vector_store_search_modes( assert search_response is not None +@vector_provider_wrapper def test_openai_vector_store_file_batch_create_and_retrieve( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test creating and retrieving a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1128,6 +1203,7 @@ def test_openai_vector_store_file_batch_create_and_retrieve( name="batch_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1178,8 +1254,9 @@ def test_openai_vector_store_file_batch_create_and_retrieve( assert retrieved_batch.status == "completed" # Should be completed after processing +@vector_provider_wrapper def test_openai_vector_store_file_batch_list_files( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test listing files in a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1191,6 +1268,7 @@ def test_openai_vector_store_file_batch_list_files( name="batch_list_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1271,8 +1349,9 @@ def test_openai_vector_store_file_batch_list_files( assert first_page_ids.isdisjoint(second_page_ids) +@vector_provider_wrapper def test_openai_vector_store_file_batch_cancel( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test cancelling a vector store file batch.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1284,6 +1363,7 @@ def test_openai_vector_store_file_batch_cancel( name="batch_cancel_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1326,8 +1406,9 @@ def test_openai_vector_store_file_batch_cancel( assert final_batch.status in ["completed", "cancelled"] +@vector_provider_wrapper def test_openai_vector_store_file_batch_retrieve_contents( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test retrieving file contents after file batch processing.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1339,6 +1420,7 @@ def test_openai_vector_store_file_batch_retrieve_contents( name="batch_contents_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1399,8 +1481,9 @@ def test_openai_vector_store_file_batch_retrieve_contents( assert file_data[i][1].decode("utf-8") in content_text +@vector_provider_wrapper def test_openai_vector_store_file_batch_error_handling( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test error handling for file batch operations.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1412,6 +1495,7 @@ def test_openai_vector_store_file_batch_error_handling( name="batch_error_test_store", extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) @@ -1443,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling( batch_id="non_existent_batch_id", ) - # Test operations on non-existent vector store (returns NotFoundError) + # Test operations on non-existent vector store (returns BadRequestError) if isinstance(compat_client, LlamaStackAsLibraryClient): vector_store_errors = ValueError else: - vector_store_errors = (NotFoundError, OpenAINotFoundError) + vector_store_errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(vector_store_errors): # Should raise an error for non-existent vector store compat_client.vector_stores.file_batches.create( @@ -1456,8 +1540,9 @@ def test_openai_vector_store_file_batch_error_handling( ) +@vector_provider_wrapper def test_openai_vector_store_embedding_config_from_metadata( - compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id ): """Test that embedding configuration works from metadata source.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) @@ -1471,6 +1556,9 @@ def test_openai_vector_store_embedding_config_from_metadata( "embedding_dimension": str(embedding_dimension), "test_source": "metadata", }, + extra_body={ + "provider_id": vector_io_provider_id, + }, ) assert vector_store_metadata is not None @@ -1489,6 +1577,7 @@ def test_openai_vector_store_embedding_config_from_metadata( extra_body={ "embedding_model": embedding_model_id, "embedding_dimension": int(embedding_dimension), # Ensure same type/value + "provider_id": vector_io_provider_id, }, ) diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index 653299338..1f67ddb24 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -8,6 +8,8 @@ import pytest from llama_stack.apis.vector_io import Chunk +from ..conftest import vector_provider_wrapper + @pytest.fixture(scope="session") def sample_chunks(): @@ -46,45 +48,51 @@ def client_with_empty_registry(client_with_models): clear_registry() -def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension): - vector_db_name = "test_vector_db" +@vector_provider_wrapper +def test_vector_store_retrieve( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id # Retrieve the vector store and validate its properties - response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id) + response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id) assert response is not None - assert response.id == actual_vector_db_id - assert response.name == vector_db_name + assert response.id == actual_vector_store_id + assert response.name == vector_store_name assert response.id.startswith("vs_") -def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension): - vector_db_name = "test_vector_db" +@vector_provider_wrapper +def test_vector_store_register( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = response.id - assert actual_vector_db_id.startswith("vs_") - assert actual_vector_db_id != vector_db_name + actual_vector_store_id = response.id + assert actual_vector_store_id.startswith("vs_") + assert actual_vector_store_id != vector_store_name vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 1 vector_store = vector_stores.data[0] - assert vector_store.id == actual_vector_db_id - assert vector_store.name == vector_db_name + assert vector_store.id == actual_vector_store_id + assert vector_store.name == vector_store_name - client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id) + client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id) vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 0 @@ -100,24 +108,27 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe ("How does machine learning improve over time?", "doc2"), ], ) -def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case): - vector_db_name = "test_vector_db" +@vector_provider_wrapper +def test_insert_chunks( + client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id +): + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=sample_chunks, ) response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="What is the capital of France?", ) assert response is not None @@ -126,7 +137,7 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding query, expected_doc_id = test_case response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query=query, ) assert response is not None @@ -135,21 +146,24 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}" -def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id, embedding_dimension): +@vector_provider_wrapper +def test_insert_chunks_with_precomputed_embeddings( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): vector_io_provider_params_dict = { "inline::milvus": {"score_threshold": -1.0}, "inline::qdrant": {"score_threshold": -1.0}, "remote::qdrant": {"score_threshold": -1.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ - "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -160,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="precomputed embedding test", params=vector_io_provider_params_dict.get(provider, None), ) @@ -181,23 +195,25 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e # expect this test to fail +@vector_provider_wrapper def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( - client_with_empty_registry, embedding_model_id, embedding_dimension + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id ): vector_io_provider_params_dict = { "inline::milvus": {"score_threshold": 0.0}, "remote::qdrant": {"score_threshold": 0.0}, "inline::qdrant": {"score_threshold": 0.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -208,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="duplicate", params=vector_io_provider_params_dict.get(provider, None), ) @@ -226,33 +242,44 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( assert response.chunks[0].metadata["source"] == "precomputed" -def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_auto_extract_embedding_dimension( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + # This test specifically tests embedding model override, so we keep embedding_model vs = client_with_empty_registry.vector_stores.create( - name="test_auto_extract", extra_body={"embedding_model": embedding_model_id} + name="test_auto_extract", + extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id}, ) assert vs.id is not None -def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_provider_auto_selection_single_provider( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"] if len(providers) != 1: pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}") - vs = client_with_empty_registry.vector_stores.create( - name="test_auto_provider", extra_body={"embedding_model": embedding_model_id} - ) + # Test that when only one provider is available, it's auto-selected (no provider_id needed) + vs = client_with_empty_registry.vector_stores.create(name="test_auto_provider") assert vs.id is not None -def test_provider_id_override(client_with_empty_registry, embedding_model_id): +@vector_provider_wrapper +def test_provider_id_override( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"] if len(providers) != 1: pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}") provider_id = providers[0].provider_id + # Test explicit provider_id specification (using default embedding model) vs = client_with_empty_registry.vector_stores.create( - name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id} + name="test_provider_override", extra_body={"provider_id": provider_id} ) assert vs.id is not None assert vs.metadata.get("provider_id") == provider_id diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index daaf229e5..7b9f3ca0c 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -23,6 +23,27 @@ def config_with_image_name_int(): image_name: 1234 apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 @@ -54,6 +75,27 @@ def up_to_date_config(): image_name: foo apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 65c3e2333..ff6dd243d 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import ( ConversationServiceConfig, ConversationServiceImpl, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.datatypes import ( + ServerStoresConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends @pytest.fixture @@ -28,7 +35,18 @@ async def service(): with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test_conversations.db" - config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[]) + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), + ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=[]) service = ConversationServiceImpl(config, {}) await service.initialize() yield service @@ -121,9 +139,18 @@ async def test_policy_configuration(): AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*")) ] - config = ConversationServiceConfig( - conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy) service = ConversationServiceImpl(config, {}) await service.initialize() diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 997df0d78..dd3246cb3 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -21,7 +21,7 @@ async def test_single_provider_auto_selection(): Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) ] ) - mock_routing_table.register_vector_db = AsyncMock( + mock_routing_table.register_vector_store = AsyncMock( return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123") ) mock_routing_table.get_provider_impl = AsyncMock( diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index 5fc27e199..fa5348d1c 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -4,90 +4,64 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -""" -Unit tests for Stack validation functions. -""" +"""Unit tests for Stack validation functions.""" from unittest.mock import AsyncMock import pytest -from llama_stack.apis.models import Model, ModelType -from llama_stack.core.stack import validate_default_embedding_model +from llama_stack.apis.models import ListModelsResponse, Model, ModelType +from llama_stack.core.datatypes import QualifiedModel, StackRunConfig, StorageConfig, VectorStoresConfig +from llama_stack.core.stack import validate_vector_stores_config from llama_stack.providers.datatypes import Api -class TestStackValidation: - """Test Stack validation functions.""" +class TestVectorStoresValidation: + async def test_validate_missing_model(self): + """Test validation fails when model not found.""" + run_config = StackRunConfig( + image_name="test", + providers={}, + storage=StorageConfig(backends={}, stores={}), + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="p", + model_id="missing", + ), + ), + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = ListModelsResponse(data=[]) - @pytest.mark.parametrize( - "models,should_raise", - [ - ([], False), # No models - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ) - ], - False, - ), # Single default - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="emb2", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb2", - ), - ], - True, - ), # Multiple defaults - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="llm1", - model_type=ModelType.llm, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="llm1", - ), - ], - False, - ), # Ignores non-embedding - ], - ) - async def test_validate_default_embedding_model(self, models, should_raise): - """Test validation with various model configurations.""" - mock_models_impl = AsyncMock() - mock_models_impl.list_models.return_value = models - impls = {Api.models: mock_models_impl} + with pytest.raises(ValueError, match="not found"): + await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models}) - if should_raise: - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await validate_default_embedding_model(impls) - else: - await validate_default_embedding_model(impls) + async def test_validate_success(self): + """Test validation passes with valid model.""" + run_config = StackRunConfig( + image_name="test", + providers={}, + storage=StorageConfig(backends={}, stores={}), + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="p", + model_id="valid", + ), + ), + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = ListModelsResponse( + data=[ + Model( + identifier="p/valid", # Must match provider_id/model_id format + model_type=ModelType.embedding, + metadata={"embedding_dimension": 768}, + provider_id="p", + provider_resource_id="valid", + ) + ] + ) - async def test_validate_default_embedding_model_no_models_api(self): - """Test validation when models API is not available.""" - await validate_default_embedding_model({}) + await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models}) diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py new file mode 100644 index 000000000..7bceba74d --- /dev/null +++ b/tests/unit/core/test_storage_references.py @@ -0,0 +1,84 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Unit tests for storage backend/reference validation.""" + +import pytest +from pydantic import ValidationError + +from llama_stack.core.datatypes import ( + LLAMA_STACK_RUN_CONFIG_VERSION, + StackRunConfig, +) +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) + + +def _base_run_config(**overrides): + metadata_reference = overrides.pop( + "metadata_reference", + KVStoreReference(backend="kv_default", namespace="registry"), + ) + inference_reference = overrides.pop( + "inference_reference", + InferenceStoreReference(backend="sql_default", table_name="inference"), + ) + conversations_reference = overrides.pop( + "conversations_reference", + SqlStoreReference(backend="sql_default", table_name="conversations"), + ) + storage = overrides.pop( + "storage", + StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"), + "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"), + }, + stores=ServerStoresConfig( + metadata=metadata_reference, + inference=inference_reference, + conversations=conversations_reference, + ), + ), + ) + return StackRunConfig( + version=LLAMA_STACK_RUN_CONFIG_VERSION, + image_name="test-distro", + apis=[], + providers={}, + storage=storage, + **overrides, + ) + + +def test_references_require_known_backend(): + with pytest.raises(ValidationError, match="unknown backend 'missing'"): + _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry")) + + +def test_references_must_match_backend_family(): + with pytest.raises(ValidationError, match="kv_.* is required"): + _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry")) + + with pytest.raises(ValidationError, match="sql_.* is required"): + _base_run_config( + inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"), + ) + + +def test_valid_configuration_passes_validation(): + config = _base_run_config() + stores = config.storage.stores + assert stores.metadata is not None and stores.metadata.backend == "kv_default" + assert stores.inference is not None and stores.inference.backend == "sql_default" + assert stores.conversations is not None and stores.conversations.backend == "sql_default" diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py deleted file mode 100644 index 52a71286b..000000000 --- a/tests/unit/distribution/test_build_path.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from pathlib import Path - -from llama_stack.cli.stack._build import ( - _run_stack_build_command_from_build_config, -) -from llama_stack.core.datatypes import BuildConfig, DistributionSpec -from llama_stack.core.utils.image_types import LlamaStackImageType - - -def test_container_build_passes_path(monkeypatch, tmp_path): - called_with = {} - - def spy_build_image(build_config, image_name, distro_or_config, run_config=None): - called_with["path"] = distro_or_config - called_with["run_config"] = run_config - return 0 - - monkeypatch.setattr( - "llama_stack.cli.stack._build.build_image", - spy_build_image, - raising=True, - ) - - cfg = BuildConfig( - image_type=LlamaStackImageType.CONTAINER.value, - distribution_spec=DistributionSpec(providers={}, description=""), - ) - - _run_stack_build_command_from_build_config(cfg, image_name="dummy") - - assert "path" in called_with - assert isinstance(called_with["path"], str) - assert Path(called_with["path"]).exists() - assert called_with["run_config"] is None diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 08a376008..3b0643a13 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -13,6 +13,15 @@ from pydantic import BaseModel, Field, ValidationError from llama_stack.core.datatypes import Api, Provider, StackRunConfig from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) from llama_stack.providers.datatypes import ProviderSpec @@ -29,6 +38,32 @@ class SampleConfig(BaseModel): } +def _default_storage() -> StorageConfig: + return StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=":memory:"), + "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), + ) + + +def make_stack_config(**overrides) -> StackRunConfig: + storage = overrides.pop("storage", _default_storage()) + defaults = dict( + image_name="test_image", + apis=[], + providers={}, + storage=storage, + ) + defaults.update(overrides) + return StackRunConfig(**defaults) + + @pytest.fixture def mock_providers(): """Mock the available_providers function to return test providers.""" @@ -47,8 +82,8 @@ def mock_providers(): @pytest.fixture def base_config(tmp_path): """Create a base StackRunConfig with common settings.""" - return StackRunConfig( - image_name="test_image", + return make_stack_config( + apis=["inference"], providers={ "inference": [ Provider( @@ -222,8 +257,8 @@ class TestProviderRegistry: def test_missing_directory(self, mock_providers): """Test handling of missing external providers directory.""" - config = StackRunConfig( - image_name="test_image", + config = make_stack_config( + apis=["inference"], providers={ "inference": [ Provider( @@ -278,7 +313,6 @@ pip_packages: """Test loading an external provider from a module (success path).""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec # Simulate a provider module with get_provider_spec @@ -293,7 +327,7 @@ pip_packages: import_module_side_effect = make_import_module_side_effect(external_module=fake_module) with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import: - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -317,12 +351,11 @@ pip_packages: def test_external_provider_from_module_not_found(self, mock_providers): """Test handling ModuleNotFoundError for missing provider module.""" - from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(raise_for_external=True) with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -341,12 +374,11 @@ pip_packages: def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers): """Test handling missing get_provider_spec in provider module (should raise ValueError).""" - from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True) with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -399,13 +431,12 @@ class TestGetExternalProvidersFromModule: def test_stackrunconfig_provider_without_module(self, mock_providers): """Test that providers without module attribute are skipped.""" - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module import_module_side_effect = make_import_module_side_effect() with patch("importlib.import_module", side_effect=import_module_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -426,7 +457,6 @@ class TestGetExternalProvidersFromModule: """Test provider with module containing version spec (e.g., package==1.0.0).""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -444,7 +474,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -564,7 +594,6 @@ class TestGetExternalProvidersFromModule: """Test when get_provider_spec returns a list of specs.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -589,7 +618,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -613,7 +642,6 @@ class TestGetExternalProvidersFromModule: """Test that list return filters specs by provider_type.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -638,7 +666,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -662,7 +690,6 @@ class TestGetExternalProvidersFromModule: """Test that list return adds multiple different provider_types when config requests them.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -688,7 +715,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -718,7 +745,6 @@ class TestGetExternalProvidersFromModule: def test_module_not_found_raises_value_error(self, mock_providers): """Test that ModuleNotFoundError raises ValueError with helpful message.""" - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module def import_side_effect(name): @@ -727,7 +753,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -751,7 +777,6 @@ class TestGetExternalProvidersFromModule: """Test that generic exceptions are properly raised.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module def bad_spec(): @@ -765,7 +790,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ @@ -787,10 +812,9 @@ class TestGetExternalProvidersFromModule: def test_empty_provider_list(self, mock_providers): """Test with empty provider list.""" - from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={}, ) @@ -805,7 +829,6 @@ class TestGetExternalProvidersFromModule: """Test multiple APIs with providers.""" from types import SimpleNamespace - from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_external_providers_from_module from llama_stack.providers.datatypes import ProviderSpec @@ -830,7 +853,7 @@ class TestGetExternalProvidersFromModule: raise ModuleNotFoundError(name) with patch("importlib.import_module", side_effect=import_side_effect): - config = StackRunConfig( + config = make_stack_config( image_name="test_image", providers={ "inference": [ diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index e14e033b9..426e2cf64 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -11,11 +11,12 @@ from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference from llama_stack.providers.inline.files.localfs import ( LocalfsFilesImpl, LocalfsFilesImplConfig, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends class MockUploadFile: @@ -36,8 +37,11 @@ async def files_provider(tmp_path): storage_dir = tmp_path / "files" db_path = tmp_path / "files_metadata.db" + backend_name = "sql_localfs_test" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())}) config = LocalfsFilesImplConfig( - storage_dir=storage_dir.as_posix(), metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()) + storage_dir=storage_dir.as_posix(), + metadata_store=SqlStoreReference(backend=backend_name, table_name="files_metadata"), ) provider = LocalfsFilesImpl(config, default_policy()) diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index b2c619e49..fe30e1a77 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -9,7 +9,16 @@ import random import pytest from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture @@ -19,12 +28,28 @@ async def temp_prompt_store(tmp_path_factory): db_path = str(temp_dir / f"{unique_id}.db") from llama_stack.core.datatypes import StackRunConfig - from llama_stack.providers.utils.kvstore import kvstore_impl - mock_run_config = StackRunConfig(image_name="test-distribution", apis=[], providers={}) + storage = StorageConfig( + backends={ + "kv_test": SqliteKVStoreConfig(db_path=db_path), + "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_test", namespace="registry"), + inference=InferenceStoreReference(backend="sql_test", table_name="inference"), + conversations=SqlStoreReference(backend="sql_test", table_name="conversations"), + ), + ) + mock_run_config = StackRunConfig( + image_name="test-distribution", + apis=[], + providers={}, + storage=storage, + ) config = PromptServiceConfig(run_config=mock_run_config) store = PromptServiceImpl(config, deps={}) - store.kvstore = await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)) + register_kvstore_backends({"kv_test": storage.backends["kv_test"]}) + store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts")) yield store diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index cfb3e1327..dfd9b6d52 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -26,6 +26,20 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register KV and SQL store backends for testing.""" + from llama_stack.core.storage.datatypes import SqliteKVStoreConfig, SqliteSqlStoreConfig + from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends + from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + kv_path = str(tmp_path / "test_kv.db") + sql_path = str(tmp_path / "test_sql.db") + + register_kvstore_backends({"kv_default": SqliteKVStoreConfig(db_path=kv_path)}) + register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=sql_path)}) + + @pytest.fixture def mock_apis(): return { @@ -40,15 +54,20 @@ def mock_apis(): @pytest.fixture def config(tmp_path): + from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference + from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig + return MetaReferenceAgentsImplConfig( - persistence_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, - responses_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, + persistence=AgentPersistenceConfig( + agent_state=KVStoreReference( + backend="kv_default", + namespace="agents", + ), + responses=ResponsesStoreReference( + backend="sql_default", + table_name="responses", + ), + ) ) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index e93668a62..f31ec0c28 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,7 +42,7 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import ListToolDefsResponse, ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.core.access_control.access_control import default_policy -from llama_stack.core.datatypes import ResponsesStoreConfig +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) @@ -50,7 +50,7 @@ from llama_stack.providers.utils.responses.responses_store import ( ResponsesStore, _OpenAIResponseObjectWithInputAndMessages, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -814,6 +814,69 @@ async def test_create_openai_response_with_instructions_and_previous_response( assert sent_messages[3].content == "Which is the largest?" +async def test_create_openai_response_with_previous_response_instructions( + openai_responses_impl, mock_responses_store, mock_inference_api +): + """Test prepending instructions and previous response with instructions.""" + + input_item_message = OpenAIResponseMessage( + id="123", + content="Name some towns in Ireland", + role="user", + ) + response_output_message = OpenAIResponseMessage( + id="123", + content="Galway, Longford, Sligo", + status="completed", + role="assistant", + ) + response = _OpenAIResponseObjectWithInputAndMessages( + created_at=1, + id="resp_123", + model="fake_model", + output=[response_output_message], + status="completed", + text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), + input=[input_item_message], + messages=[ + OpenAIUserMessageParam(content="Name some towns in Ireland"), + OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"), + ], + instructions="You are a helpful assistant.", + ) + mock_responses_store.get_response_object.return_value = response + + model = "meta-llama/Llama-3.1-8B-Instruct" + instructions = "You are a geography expert. Provide concise answers." + + mock_inference_api.openai_chat_completion.return_value = fake_stream() + + # Execute + await openai_responses_impl.create_openai_response( + input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123" + ) + + # Verify + mock_inference_api.openai_chat_completion.assert_called_once() + call_args = mock_inference_api.openai_chat_completion.call_args + params = call_args.args[0] + sent_messages = params.messages + + # Check that instructions were prepended as a system message + # and that the previous response instructions were not carried over + assert len(sent_messages) == 4, sent_messages + assert sent_messages[0].role == "system" + assert sent_messages[0].content == instructions + + # Check the rest of the messages were converted correctly + assert sent_messages[1].role == "user" + assert sent_messages[1].content == "Name some towns in Ireland" + assert sent_messages[2].role == "assistant" + assert sent_messages[2].content == "Galway, Longford, Sligo" + assert sent_messages[3].role == "user" + assert sent_messages[3].content == "Which is the largest?" + + async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store): """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters.""" # Setup @@ -854,8 +917,10 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() + backend_name = "sql_responses_test" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path="mock_db_path")}) responses_store = ResponsesStore( - ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ResponsesStoreReference(backend=backend_name, table_name="responses"), policy=default_policy() ) responses_store.sql_store = mock_sql_store diff --git a/tests/unit/providers/batches/conftest.py b/tests/unit/providers/batches/conftest.py index df37141b5..d161bf976 100644 --- a/tests/unit/providers/batches/conftest.py +++ b/tests/unit/providers/batches/conftest.py @@ -12,10 +12,10 @@ from unittest.mock import AsyncMock import pytest +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture @@ -23,8 +23,10 @@ async def provider(): """Create a test provider instance with temporary database.""" with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test_batches.db" + backend_name = "kv_batches_test" kvstore_config = SqliteKVStoreConfig(db_path=str(db_path)) - config = ReferenceBatchesImplConfig(kvstore=kvstore_config) + register_kvstore_backends({backend_name: kvstore_config}) + config = ReferenceBatchesImplConfig(kvstore=KVStoreReference(backend=backend_name, namespace="batches")) # Create kvstore and mock APIs kvstore = await kvstore_impl(config.kvstore) diff --git a/tests/unit/providers/files/conftest.py b/tests/unit/providers/files/conftest.py index 46282e3dc..c64ecc3a3 100644 --- a/tests/unit/providers/files/conftest.py +++ b/tests/unit/providers/files/conftest.py @@ -8,8 +8,9 @@ import boto3 import pytest from moto import mock_aws +from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends class MockUploadFile: @@ -38,11 +39,13 @@ def sample_text_file2(): def s3_config(tmp_path): db_path = tmp_path / "s3_files_metadata.db" + backend_name = f"sql_s3_{tmp_path.name}" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())}) return S3FilesImplConfig( bucket_name=f"test-bucket-{tmp_path.name}", region="not-a-region", auto_create_bucket=True, - metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()), + metadata_store=SqlStoreReference(backend=backend_name, table_name="s3_files_metadata"), ) diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 590bdd1d2..3a5cd5bf7 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -4,138 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import patch import pytest -from llama_stack.apis.common.content_types import URL, TextContentItem -from llama_stack.apis.tools import RAGDocument -from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc - - -async def test_content_from_doc_with_url(): - """Test extracting content from RAGDocument with URL content.""" - mock_url = URL(uri="https://example.com") - mock_doc = RAGDocument(document_id="foo", content=mock_url) - - mock_response = MagicMock() - mock_response.text = "Sample content from URL" - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - - result = await content_from_doc(mock_doc) - - assert result == "Sample content from URL" - mock_instance.get.assert_called_once_with(mock_url.uri) - - -async def test_content_from_doc_with_pdf_url(): - """Test extracting content from RAGDocument with URL pointing to a PDF.""" - mock_url = URL(uri="https://example.com/document.pdf") - mock_doc = RAGDocument(document_id="foo", content=mock_url, mime_type="application/pdf") - - mock_response = MagicMock() - mock_response.content = b"PDF binary data" - - with ( - patch("httpx.AsyncClient") as mock_client, - patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf, - ): - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - mock_parse_pdf.return_value = "Extracted PDF content" - - result = await content_from_doc(mock_doc) - - assert result == "Extracted PDF content" - mock_instance.get.assert_called_once_with(mock_url.uri) - mock_parse_pdf.assert_called_once_with(b"PDF binary data") - - -async def test_content_from_doc_with_data_url(): - """Test extracting content from RAGDocument with data URL content.""" - data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ=" # "Hello World" base64 encoded - mock_url = URL(uri=data_url) - mock_doc = RAGDocument(document_id="foo", content=mock_url) - - with patch("llama_stack.providers.utils.memory.vector_store.content_from_data") as mock_content_from_data: - mock_content_from_data.return_value = "Hello World" - - result = await content_from_doc(mock_doc) - - assert result == "Hello World" - mock_content_from_data.assert_called_once_with(data_url) - - -async def test_content_from_doc_with_string(): - """Test extracting content from RAGDocument with string content.""" - content_string = "This is plain text content" - mock_doc = RAGDocument(document_id="foo", content=content_string) - - result = await content_from_doc(mock_doc) - - assert result == content_string - - -async def test_content_from_doc_with_string_url(): - """Test extracting content from RAGDocument with string URL content.""" - url_string = "https://example.com" - mock_doc = RAGDocument(document_id="foo", content=url_string) - - mock_response = MagicMock() - mock_response.text = "Sample content from URL string" - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - - result = await content_from_doc(mock_doc) - - assert result == "Sample content from URL string" - mock_instance.get.assert_called_once_with(url_string) - - -async def test_content_from_doc_with_string_pdf_url(): - """Test extracting content from RAGDocument with string URL pointing to a PDF.""" - url_string = "https://example.com/document.pdf" - mock_doc = RAGDocument(document_id="foo", content=url_string, mime_type="application/pdf") - - mock_response = MagicMock() - mock_response.content = b"PDF binary data" - - with ( - patch("httpx.AsyncClient") as mock_client, - patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf, - ): - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - mock_parse_pdf.return_value = "Extracted PDF content from string URL" - - result = await content_from_doc(mock_doc) - - assert result == "Extracted PDF content from string URL" - mock_instance.get.assert_called_once_with(url_string) - mock_parse_pdf.assert_called_once_with(b"PDF binary data") - - -async def test_content_from_doc_with_interleaved_content(): - """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit).""" - interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")] - mock_doc = RAGDocument(document_id="foo", content=interleaved_content) - - with patch("llama_stack.providers.utils.memory.vector_store.interleaved_content_as_str") as mock_interleaved: - mock_interleaved.return_value = "First item\nSecond item" - - result = await content_from_doc(mock_doc) - - assert result == "First item\nSecond item" - mock_interleaved.assert_called_once_with(interleaved_content) +from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type def test_content_from_data_and_mime_type_success_utf8(): @@ -178,41 +51,3 @@ def test_content_from_data_and_mime_type_both_encodings_fail(): # Should raise an exception instead of returning empty string with pytest.raises(UnicodeDecodeError): content_from_data_and_mime_type(data, mime_type) - - -async def test_memory_tool_error_handling(): - """Test that memory tool handles various failures gracefully without crashing.""" - from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig - from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl - - config = RagToolRuntimeConfig() - memory_tool = MemoryToolRuntimeImpl( - config=config, - vector_io_api=AsyncMock(), - inference_api=AsyncMock(), - files_api=AsyncMock(), - ) - - docs = [ - RAGDocument(document_id="good_doc", content="Good content", metadata={}), - RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}), - RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}), - ] - - mock_file1 = MagicMock() - mock_file1.id = "file_good1" - mock_file2 = MagicMock() - mock_file2.id = "file_good2" - memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2] - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.side_effect = Exception("Bad URL") - mock_client.return_value.__aenter__.return_value = mock_instance - - # won't raise exception despite one document failing - await memory_tool.insert(docs, "vector_store_123") - - # processed 2 documents successfully, skipped 1 - assert memory_tool.files_api.openai_upload_file.call_count == 2 - assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2 diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 8e5c85cf1..2951ca2e5 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -10,15 +10,16 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import register_kvstore_backends EMBEDDING_DIMENSION = 768 COLLECTION_PREFIX = "test_collection" @@ -30,7 +31,7 @@ def vector_provider(request): @pytest.fixture -def vector_db_id() -> str: +def vector_store_id() -> str: return f"test-vector-db-{random.randint(1, 100)}" @@ -112,8 +113,9 @@ async def unique_kvstore_config(tmp_path_factory): unique_id = f"test_kv_{np.random.randint(1e6)}" temp_dir = tmp_path_factory.getbasetemp() db_path = str(temp_dir / f"{unique_id}.db") - - return SqliteKVStoreConfig(db_path=db_path) + backend_name = f"kv_vector_{unique_id}" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + return KVStoreReference(backend=backend_name, namespace=f"vector_io::{unique_id}") @pytest.fixture(scope="session") @@ -138,18 +140,17 @@ async def sqlite_vec_vec_index(embedding_dimension, tmp_path_factory): async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inference_api, embedding_dimension): config = SQLiteVectorIOConfig( db_path=sqlite_vec_db_path, - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = SQLiteVecVectorIOAdapter( config=config, inference_api=mock_inference_api, files_api=None, - models_api=None, ) collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}" await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=collection_id, provider_id="test_provider", embedding_model="test_model", @@ -177,17 +178,16 @@ async def faiss_vec_index(embedding_dimension): @pytest.fixture async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension): config = FaissVectorIOConfig( - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = FaissVectorIOAdapter( config=config, inference_api=mock_inference_api, files_api=None, - models_api=None, ) await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=f"faiss_test_collection_{np.random.randint(1e6)}", provider_id="test_provider", embedding_model="test_model", @@ -215,7 +215,7 @@ def mock_psycopg2_connection(): async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): connection, cursor = mock_psycopg2_connection - vector_db = VectorDB( + vector_store = VectorStore( identifier="test-vector-db", embedding_model="test-model", embedding_dimension=embedding_dimension, @@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"): - index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE") + index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE") index._test_chunks = [] original_add_chunks = index.add_chunks @@ -253,7 +253,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd db="test_db", user="test_user", password="test_password", - kvstore=unique_kvstore_config, + persistence=unique_kvstore_config, ) adapter = PGVectorVectorIOAdapter(config, mock_inference_api, None) @@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd await adapter.initialize() adapter.conn = mock_conn - async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") await index.insert_chunks(chunks) adapter.insert_chunks = mock_insert_chunks - async def mock_query_chunks(vector_db_id, query, params=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_query_chunks(vector_store_id, query, params=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") return await index.query_chunks(query, params) adapter.query_chunks = mock_query_chunks - test_vector_db = VectorDB( + test_vector_store = VectorStore( identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}", provider_id="test_provider", embedding_model="test_model", embedding_dimension=embedding_dimension, ) - await adapter.register_vector_db(test_vector_db) - adapter.test_collection_id = test_vector_db.identifier + await adapter.register_vector_store(test_vector_store) + adapter.test_collection_id = test_vector_store.identifier yield adapter await adapter.shutdown() diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 76969b711..7b870d16e 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -11,9 +11,8 @@ import numpy as np import pytest from llama_stack.apis.files import Files -from llama_stack.apis.models import Models -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import ( @@ -44,8 +43,8 @@ def embedding_dimension(): @pytest.fixture -def vector_db_id(): - return "test_vector_db" +def vector_store_id(): + return "test_vector_store" @pytest.fixture @@ -62,12 +61,12 @@ def sample_embeddings(embedding_dimension): @pytest.fixture -def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock: - mock_vector_db = MagicMock(spec=VectorDB) - mock_vector_db.embedding_model = "mock_embedding_model" - mock_vector_db.identifier = vector_db_id - mock_vector_db.embedding_dimension = embedding_dimension - return mock_vector_db +def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock: + mock_vector_store = MagicMock(spec=VectorStore) + mock_vector_store.embedding_model = "mock_embedding_model" + mock_vector_store.identifier = vector_store_id + mock_vector_store.embedding_dimension = embedding_dimension + return mock_vector_store @pytest.fixture @@ -76,12 +75,6 @@ def mock_files_api(): return mock_api -@pytest.fixture -def mock_models_api(): - mock_api = MagicMock(spec=Models) - return mock_api - - @pytest.fixture def faiss_config(): config = MagicMock(spec=FaissVectorIOConfig) @@ -117,7 +110,7 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_ assert response.chunks[1] == sample_chunks[1] -async def test_health_success(mock_models_api): +async def test_health_success(): """Test that the health check returns OK status when faiss is working correctly.""" # Create a fresh instance of FaissVectorIOAdapter for testing config = MagicMock() @@ -126,9 +119,7 @@ async def test_health_success(mock_models_api): with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat: mock_index_flat.return_value = MagicMock() - adapter = FaissVectorIOAdapter( - config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api - ) + adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api) # Calling the health method directly response = await adapter.health() @@ -142,7 +133,7 @@ async def test_health_success(mock_models_api): mock_index_flat.assert_called_once_with(128) # VECTOR_DIMENSION is 128 -async def test_health_failure(mock_models_api): +async def test_health_failure(): """Test that the health check returns ERROR status when faiss encounters an error.""" # Create a fresh instance of FaissVectorIOAdapter for testing config = MagicMock() @@ -152,9 +143,7 @@ async def test_health_failure(mock_models_api): with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat: mock_index_flat.side_effect = Exception("Test error") - adapter = FaissVectorIOAdapter( - config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api - ) + adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api) # Calling the health method directly response = await adapter.health() diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 32d59c91b..65d7b7602 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -6,14 +6,12 @@ import json import time -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock, patch import numpy as np import pytest from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.models import Model, ModelType -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -22,6 +20,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategyAuto, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX # This test is a unit test for the inline VectorIO providers. This should only contain @@ -72,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): key = f"{VECTOR_DBS_PREFIX}db1" - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump())) @@ -82,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.initialize() - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) await vector_io_adapter.shutdown() await vector_io_adapter.initialize() @@ -93,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.shutdown() -async def test_register_and_unregister_vector_db(vector_io_adapter): +async def test_register_and_unregister_vector_store(vector_io_adapter): unique_id = f"foo_db_{np.random.randint(1e6)}" - dummy = VectorDB( + dummy = VectorStore( identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) assert dummy.identifier in vector_io_adapter.cache - await vector_io_adapter.unregister_vector_db(dummy.identifier) + await vector_io_adapter.unregister_vector_store(dummy.identifier) assert dummy.identifier not in vector_io_adapter.cache @@ -122,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter): async def test_insert_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.insert_chunks("db_not_exist", []) @@ -171,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter async def test_query_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.query_chunks("db_missing", "q", None) @@ -183,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -199,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -215,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -230,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -996,101 +995,11 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): assert batch.file_counts.in_progress == 8 -async def test_get_default_embedding_model_success(vector_io_adapter): - """Test successful default embedding model detection.""" - # Mock models API with a default model - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="nomic-embed-text-v1.5", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={ - "embedding_dimension": 768, - "default_configured": True, - }, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - result = await vector_io_adapter._get_default_embedding_model_and_dimension() - - assert result is not None - model_id, dimension = result - assert model_id == "nomic-embed-text-v1.5" - assert dimension == 768 - - -async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter): - """Test error when multiple models are marked as default.""" - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="model1", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 768, "default_configured": True}, - ), - Model( - identifier="model2", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ), - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await vector_io_adapter._get_default_embedding_model_and_dimension() - - -async def test_openai_create_vector_store_uses_default_model(vector_io_adapter): - """Test that vector store creation uses default embedding model when none specified.""" - # Mock models API and dependencies - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="default-model", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - vector_io_adapter.register_vector_db = AsyncMock() - vector_io_adapter.__provider_id__ = "test-provider" - - # Create vector store without specifying embedding model - params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store") - result = await vector_io_adapter.openai_create_vector_store(params) - - # Verify the vector store was created with default model - assert result.name == "test-store" - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] - assert call_args.embedding_model == "default-model" - assert call_args.embedding_dimension == 512 - - async def test_embedding_config_from_metadata(vector_io_adapter): """Test that embedding configuration is correctly extracted from metadata.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1106,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from metadata - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from metadata + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "test-embedding-model" assert call_args.embedding_dimension == 512 @@ -1116,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter): async def test_embedding_config_from_extra_body(vector_io_adapter): """Test that embedding configuration is correctly extracted from extra_body when metadata is empty.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1133,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from extra_body - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from extra_body + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "extra-body-model" assert call_args.embedding_dimension == 1024 @@ -1143,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): async def test_embedding_config_consistency_check_passes(vector_io_adapter): """Test that consistent embedding config in both metadata and extra_body passes validation.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1164,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should not raise any error and use metadata config - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "consistent-model" assert call_args.embedding_dimension == 768 @@ -1173,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): async def test_embedding_config_inconsistency_errors(vector_io_adapter): """Test that inconsistent embedding config between metadata and extra_body raises errors.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1195,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Reset mock for second test - vector_io_adapter.register_vector_db.reset_mock() + vector_io_adapter.register_vector_store.reset_mock() # Test with inconsistent embedding dimension params = OpenAICreateVectorStoreRequestWithExtraBody( @@ -1217,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): async def test_embedding_config_defaults_when_missing(vector_io_adapter): """Test that embedding dimension defaults to 768 when not provided.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1234,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should default to 768 dimensions - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "model-without-dimension" assert call_args.embedding_dimension == 768 @@ -1243,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): async def test_embedding_config_required_model_missing(vector_io_adapter): """Test that missing embedding model raises error.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" # Mock the default model lookup to return None (no default model available) @@ -1253,5 +1162,5 @@ async def test_embedding_config_required_model_missing(vector_io_adapter): # Test with no embedding model provided params = OpenAICreateVectorStoreRequestWithExtraBody(name="test_store", metadata={}) - with pytest.raises(ValueError, match="embedding_model is required in extra_body when creating a vector store"): + with pytest.raises(ValueError, match="embedding_model is required"): await vector_io_adapter.openai_create_vector_store(params) diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py deleted file mode 100644 index a45b66f02..000000000 --- a/tests/unit/rag/test_rag_query.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from llama_stack.apis.tools.rag_tool import RAGQueryConfig -from llama_stack.apis.vector_io import ( - Chunk, - ChunkMetadata, - QueryChunksResponse, -) -from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl - - -class TestRagQuery: - async def test_query_raises_on_empty_vector_db_ids(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() - ) - with pytest.raises(ValueError): - await rag_tool.query(content=MagicMock(), vector_db_ids=[]) - - async def test_query_chunk_metadata_handling(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() - ) - content = "test query content" - vector_db_ids = ["db1"] - - chunk_metadata = ChunkMetadata( - document_id="doc1", - chunk_id="chunk1", - source="test_source", - metadata_token_count=5, - ) - interleaved_content = MagicMock() - chunk = Chunk( - content=interleaved_content, - metadata={ - "key1": "value1", - "token_count": 10, - "metadata_token_count": 5, - # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert() - "document_id": "doc1", - }, - stored_chunk_id="chunk1", - chunk_metadata=chunk_metadata, - ) - - query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0]) - - rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response) - result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids) - - assert result is not None - expected_metadata_string = ( - "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}" - ) - assert expected_metadata_string in result.content[1].text - assert result.content is not None - - async def test_query_raises_incorrect_mode(self): - with pytest.raises(ValueError): - RAGQueryConfig(mode="invalid_mode") - - async def test_query_accepts_valid_modes(self): - default_config = RAGQueryConfig() # Test default (vector) - assert default_config.mode == "vector" - vector_config = RAGQueryConfig(mode="vector") # Test vector - assert vector_config.mode == "vector" - keyword_config = RAGQueryConfig(mode="keyword") # Test keyword - assert keyword_config.mode == "keyword" - hybrid_config = RAGQueryConfig(mode="hybrid") # Test hybrid - assert hybrid_config.mode == "hybrid" - - # Test that invalid mode raises an error - with pytest.raises(ValueError): - RAGQueryConfig(mode="wrong_mode") - - async def test_query_adds_vector_db_id_to_chunk_metadata(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), - vector_io_api=MagicMock(), - inference_api=MagicMock(), - files_api=MagicMock(), - ) - - vector_db_ids = ["db1", "db2"] - - # Fake chunks from each DB - chunk_metadata1 = ChunkMetadata( - document_id="doc1", - chunk_id="chunk1", - source="test_source1", - metadata_token_count=5, - ) - chunk1 = Chunk( - content="chunk from db1", - metadata={"vector_db_id": "db1", "document_id": "doc1"}, - stored_chunk_id="c1", - chunk_metadata=chunk_metadata1, - ) - - chunk_metadata2 = ChunkMetadata( - document_id="doc2", - chunk_id="chunk2", - source="test_source2", - metadata_token_count=5, - ) - chunk2 = Chunk( - content="chunk from db2", - metadata={"vector_db_id": "db2", "document_id": "doc2"}, - stored_chunk_id="c2", - chunk_metadata=chunk_metadata2, - ) - - rag_tool.vector_io_api.query_chunks = AsyncMock( - side_effect=[ - QueryChunksResponse(chunks=[chunk1], scores=[0.9]), - QueryChunksResponse(chunks=[chunk2], scores=[0.8]), - ] - ) - - result = await rag_tool.query(content="test", vector_db_ids=vector_db_ids) - returned_chunks = result.metadata["chunks"] - returned_scores = result.metadata["scores"] - returned_doc_ids = result.metadata["document_ids"] - returned_vector_db_ids = result.metadata["vector_db_ids"] - - assert returned_chunks == ["chunk from db1", "chunk from db2"] - assert returned_scores == (0.9, 0.8) - assert returned_doc_ids == ["doc1", "doc2"] - assert returned_vector_db_ids == ["db1", "db2"] diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 1e40c98e8..e185b83e7 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -4,10 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import base64 -import mimetypes -import os -from pathlib import Path from unittest.mock import AsyncMock, MagicMock import numpy as np @@ -17,37 +13,13 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, ) -from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( - URL, - VectorDBWithIndex, + VectorStoreWithIndex, _validate_embedding, - content_from_doc, make_overlapped_chunks, ) -DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf" -# Depending on the machine, this can get parsed a couple of ways -DUMMY_PDF_TEXT_CHOICES = ["Dummy PDF file", "Dumm y PDF file"] - - -def read_file(file_path: str) -> bytes: - with open(file_path, "rb") as file: - return file.read() - - -def data_url_from_file(file_path: str) -> str: - with open(file_path, "rb") as file: - file_content = file.read() - - base64_content = base64.b64encode(file_content).decode("utf-8") - mime_type, _ = mimetypes.guess_type(file_path) - - data_url = f"data:{mime_type};base64,{base64_content}" - - return data_url - class TestChunk: def test_chunk(self): @@ -116,45 +88,6 @@ class TestValidateEmbedding: class TestVectorStore: - async def test_returns_content_from_pdf_data_uri(self): - data_uri = data_url_from_file(DUMMY_PDF_PATH) - doc = RAGDocument( - document_id="dummy", - content=data_uri, - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - - @pytest.mark.allow_network - async def test_downloads_pdf_and_returns_content(self): - # Using GitHub to host the PDF file - url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = RAGDocument( - document_id="dummy", - content=url, - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - - @pytest.mark.allow_network - async def test_downloads_pdf_and_returns_content_with_url_object(self): - # Using GitHub to host the PDF file - url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = RAGDocument( - document_id="dummy", - content=URL( - uri=url, - ), - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.parametrize( "window_len, overlap_len, expected_chunks", [ @@ -206,15 +139,15 @@ class TestVectorStore: assert str(excinfo.value.__cause__) == "Cannot convert to string" -class TestVectorDBWithIndex: +class TestVectorStoreWithIndex: async def test_insert_chunks_without_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model without embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model without embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -227,7 +160,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() @@ -243,14 +176,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_valid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -258,7 +191,7 @@ class TestVectorDBWithIndex: Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_called_once() @@ -267,14 +200,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_invalid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_dimension = 3 - mock_vector_db.embedding_model = "test-model with invalid embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_dimension = 3 + mock_vector_store.embedding_model = "test-model with invalid embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) # Verify Chunk raises ValueError for invalid embedding type @@ -283,7 +216,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match="Input should be a valid list"): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( [ Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 2", embedding="invalid_type", metadata={}), @@ -292,7 +225,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={}) ) @@ -300,20 +233,20 @@ class TestVectorDBWithIndex: Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}), ] with pytest.raises(ValueError, match="has dimension 4, expected 3"): - await vector_db_with_index.insert_chunks(chunks_wrong_dim) + await vector_store_with_index.insert_chunks(chunks_wrong_dim) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() async def test_insert_chunks_with_partially_precomputed_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with partial embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with partial embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -327,7 +260,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index e49c9dc77..d4c9786d1 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -8,24 +8,24 @@ import pytest from llama_stack.apis.inference import Model -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.core.datatypes import VectorDBWithOwner +from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.datatypes import VectorStoreWithOwner +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.store.registry import ( KEY_FORMAT, CachedDiskDistributionRegistry, DiskDistributionRegistry, ) -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends @pytest.fixture -def sample_vector_db(): - return VectorDB( - identifier="test_vector_db", +def sample_vector_store(): + return VectorStore( + identifier="test_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) @@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry): assert result is None -async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): - print(f"Registering {sample_vector_db}") - await disk_dist_registry.register(sample_vector_db) +async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model): + print(f"Registering {sample_vector_store}") + await disk_dist_registry.register(sample_vector_store) print(f"Registering {sample_model}") await disk_dist_registry.register(sample_model) - print("Getting vector_db") - result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.provider_id == sample_vector_db.provider_id + print("Getting vector_store") + result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.provider_id == sample_vector_store.provider_id result_model = await disk_dist_registry.get("model", "test_model") assert result_model is not None @@ -63,127 +63,137 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m assert result_model.provider_id == sample_model.provider_id -async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): +async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model): # First populate the disk registry disk_registry = DiskDistributionRegistry(sqlite_kvstore) await disk_registry.initialize() - await disk_registry.register(sample_vector_db) + await disk_registry.register(sample_vector_store) await disk_registry.register(sample_model) # Test cached version loads from disk db_path = sqlite_kvstore.db_path - cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))) + backend_name = "kv_cached_test" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + cached_registry = CachedDiskDistributionRegistry( + await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) + ) await cached_registry.initialize() - result_vector_db = await cached_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension - assert result_vector_db.provider_id == sample_vector_db.provider_id + result_vector_store = await cached_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension + assert result_vector_store.provider_id == sample_vector_store.provider_id async def test_cached_registry_updates(cached_disk_dist_registry): - new_vector_db = VectorDB( - identifier="test_vector_db_2", + new_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - await cached_disk_dist_registry.register(new_vector_db) + await cached_disk_dist_registry.register(new_vector_store) # Verify in cache - result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id # Verify persisted to disk db_path = cached_disk_dist_registry.kvstore.db_path - new_registry = DiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))) + backend_name = "kv_cached_new" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)}) + new_registry = DiskDistributionRegistry( + await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) + ) await new_registry.initialize() - result_vector_db = await new_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await new_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id async def test_duplicate_provider_registration(cached_disk_dist_registry): - original_vector_db = VectorDB( - identifier="test_vector_db_2", + original_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - assert await cached_disk_dist_registry.register(original_vector_db) + assert await cached_disk_dist_registry.register(original_vector_store) - duplicate_vector_db = VectorDB( - identifier="test_vector_db_2", + duplicate_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="different-model", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", # Same provider_id ) - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"): - await cached_disk_dist_registry.register(duplicate_vector_db) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists" + ): + await cached_disk_dist_registry.register(duplicate_vector_store) - result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") + result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") assert result is not None - assert result.embedding_model == original_vector_db.embedding_model # Original values preserved + assert result.embedding_model == original_vector_store.embedding_model # Original values preserved async def test_get_all_objects(cached_disk_dist_registry): # Create multiple test banks # Create multiple test banks - test_vector_dbs = [ - VectorDB( - identifier=f"test_vector_db_{i}", + test_vector_stores = [ + VectorStore( + identifier=f"test_vector_store_{i}", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id=f"test_vector_db_{i}", + provider_resource_id=f"test_vector_store_{i}", provider_id=f"provider_{i}", ) for i in range(3) ] - # Register all vector_dbs - for vector_db in test_vector_dbs: - await cached_disk_dist_registry.register(vector_db) + # Register all vector_stores + for vector_store in test_vector_stores: + await cached_disk_dist_registry.register(vector_store) # Test get_all retrieval all_results = await cached_disk_dist_registry.get_all() assert len(all_results) == 3 - # Verify each vector_db was stored correctly - for original_vector_db in test_vector_dbs: - matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier] - assert len(matching_vector_dbs) == 1 - stored_vector_db = matching_vector_dbs[0] - assert stored_vector_db.embedding_model == original_vector_db.embedding_model - assert stored_vector_db.provider_id == original_vector_db.provider_id - assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension + # Verify each vector_store was stored correctly + for original_vector_store in test_vector_stores: + matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier] + assert len(matching_vector_stores) == 1 + stored_vector_store = matching_vector_stores[0] + assert stored_vector_store.embedding_model == original_vector_store.embedding_model + assert stored_vector_store.provider_id == original_vector_store.provider_id + assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension async def test_parse_registry_values_error_handling(sqlite_kvstore): - valid_db = VectorDB( - identifier="valid_vector_db", + valid_db = VectorStore( + identifier="valid_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="valid_vector_db", + provider_resource_id="valid_vector_store", provider_id="test-provider", ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json() ) - await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json") + await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json") await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="missing_fields"), - '{"type": "vector_db", "identifier": "missing_fields"}', + KEY_FORMAT.format(type="vector_store", identifier="missing_fields"), + '{"type": "vector_store", "identifier": "missing_fields"}', ) test_registry = DiskDistributionRegistry(sqlite_kvstore) @@ -194,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore): # Should have filtered out the invalid entries assert len(all_objects) == 1 - assert all_objects[0].identifier == "valid_vector_db" + assert all_objects[0].identifier == "valid_vector_store" # Check that the get method also handles errors correctly - invalid_obj = await test_registry.get("vector_db", "corrupted_json") + invalid_obj = await test_registry.get("vector_store", "corrupted_json") assert invalid_obj is None - invalid_obj = await test_registry.get("vector_db", "missing_fields") + invalid_obj = await test_registry.get("vector_store", "missing_fields") assert invalid_obj is None async def test_cached_registry_error_handling(sqlite_kvstore): - valid_db = VectorDB( + valid_db = VectorStore( identifier="valid_cached_db", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, @@ -214,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore): ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json() ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"), - '{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string + KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"), + '{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string ) cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore) @@ -229,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore): assert len(all_objects) == 1 assert all_objects[0].identifier == "valid_cached_db" - invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db") + invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db") assert invalid_obj is None async def test_double_registration_identical_objects(disk_dist_registry): """Test that registering identical objects succeeds (idempotent).""" - vector_db = VectorDBWithOwner( - identifier="test_vector_db", + vector_store = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db) + result1 = await disk_dist_registry.register(vector_store) assert result1 is True # Second registration of identical object should also succeed (idempotent) - result2 = await disk_dist_registry.register(vector_db) + result2 = await disk_dist_registry.register(vector_store) assert result2 is True # Verify object exists and is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None - assert retrieved.identifier == vector_db.identifier - assert retrieved.embedding_model == vector_db.embedding_model + assert retrieved.identifier == vector_store.identifier + assert retrieved.embedding_model == vector_store.embedding_model async def test_double_registration_different_objects(disk_dist_registry): """Test that registering different objects with same identifier fails.""" - vector_db1 = VectorDBWithOwner( - identifier="test_vector_db", + vector_store1 = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) - vector_db2 = VectorDBWithOwner( - identifier="test_vector_db", # Same identifier + vector_store2 = VectorStoreWithOwner( + identifier="test_vector_store", # Same identifier embedding_model="different-model", # Different embedding model embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db1) + result1 = await disk_dist_registry.register(vector_store1) assert result1 is True # Second registration with different data should fail - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"): - await disk_dist_registry.register(vector_db2) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists" + ): + await disk_dist_registry.register(vector_store2) # Verify original object is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 04ae89db8..75cbf518b 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -516,6 +516,82 @@ def test_get_attributes_from_claims(): assert set(attributes["teams"]) == {"my-team", "group1", "group2"} assert attributes["namespaces"] == ["my-tenant"] + # Test nested claims with dot notation (e.g., Keycloak resource_access structure) + claims = { + "sub": "user123", + "resource_access": {"llamastack": {"roles": ["inference_max", "admin"]}, "other-client": {"roles": ["viewer"]}}, + "realm_access": {"roles": ["offline_access", "uma_authorization"]}, + } + attributes = get_attributes_from_claims( + claims, {"resource_access.llamastack.roles": "roles", "realm_access.roles": "realm_roles"} + ) + assert set(attributes["roles"]) == {"inference_max", "admin"} + assert set(attributes["realm_roles"]) == {"offline_access", "uma_authorization"} + + # Test that dot notation takes precedence over literal keys with dots + claims = { + "my.dotted.key": "literal-value", + "my": {"dotted": {"key": "nested-value"}}, + } + attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"}) + assert attributes["test"] == ["nested-value"] + + # Test that literal key works when nested traversal doesn't exist + claims = { + "my.dotted.key": "literal-value", + } + attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"}) + assert attributes["test"] == ["literal-value"] + + # Test missing nested paths are handled gracefully + claims = { + "sub": "user123", + "resource_access": {"other-client": {"roles": ["viewer"]}}, + } + attributes = get_attributes_from_claims( + claims, + { + "resource_access.llamastack.roles": "roles", # Missing nested path + "resource_access.missing.key": "missing_attr", # Missing nested path + "completely.missing.path": "another_missing", # Completely missing + "sub": "username", # Existing path + }, + ) + # Only the existing claim should be in attributes + assert attributes["username"] == ["user123"] + assert "roles" not in attributes + assert "missing_attr" not in attributes + assert "another_missing" not in attributes + + # Test mixture of flat and nested claims paths + claims = { + "sub": "user456", + "flat_key": "flat-value", + "scope": "read write admin", + "resource_access": {"app1": {"roles": ["role1", "role2"]}, "app2": {"roles": ["role3"]}}, + "groups": ["group1", "group2"], + "metadata": {"tenant": "tenant1", "region": "us-west"}, + } + attributes = get_attributes_from_claims( + claims, + { + "sub": "user_id", # Flat string + "scope": "permissions", # Flat string with spaces + "groups": "teams", # Flat list + "resource_access.app1.roles": "app1_roles", # Nested list + "resource_access.app2.roles": "app2_roles", # Nested list + "metadata.tenant": "tenant", # Nested string + "metadata.region": "region", # Nested string + }, + ) + assert attributes["user_id"] == ["user456"] + assert set(attributes["permissions"]) == {"read", "write", "admin"} + assert set(attributes["teams"]) == {"group1", "group2"} + assert set(attributes["app1_roles"]) == {"role1", "role2"} + assert attributes["app2_roles"] == ["role3"] + assert attributes["tenant"] == ["tenant1"] + assert attributes["region"] == ["us-west"] + # TODO: add more tests for oauth2 token provider diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py index 85acbc66a..16b1772ce 100644 --- a/tests/unit/server/test_quota.py +++ b/tests/unit/server/test_quota.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from uuid import uuid4 + import pytest from fastapi import FastAPI, Request from fastapi.testclient import TestClient @@ -11,7 +13,8 @@ from starlette.middleware.base import BaseHTTPMiddleware from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod from llama_stack.core.server.quota import QuotaMiddleware -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore import register_kvstore_backends class InjectClientIDMiddleware(BaseHTTPMiddleware): @@ -29,8 +32,10 @@ class InjectClientIDMiddleware(BaseHTTPMiddleware): def build_quota_config(db_path) -> QuotaConfig: + backend_name = f"kv_quota_{uuid4().hex}" + register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=str(db_path))}) return QuotaConfig( - kvstore=SqliteKVStoreConfig(db_path=str(db_path)), + kvstore=KVStoreReference(backend=backend_name, namespace="quota"), anonymous_max_requests=1, authenticated_max_requests=2, period=QuotaPeriod.DAY, diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index 1ee1b2f47..b44f12f7e 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -12,15 +12,22 @@ from unittest.mock import AsyncMock, MagicMock from pydantic import BaseModel, Field from llama_stack.apis.inference import Inference -from llama_stack.core.datatypes import ( - Api, - Provider, - StackRunConfig, -) +from llama_stack.core.datatypes import Api, Provider, StackRunConfig from llama_stack.core.resolver import resolve_impls from llama_stack.core.routers.inference import InferenceRouter from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec +from llama_stack.providers.utils.kvstore import register_kvstore_backends +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None: @@ -65,6 +72,35 @@ class SampleImpl: pass +def make_run_config(**overrides) -> StackRunConfig: + storage = overrides.pop( + "storage", + StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=":memory:"), + "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), + ), + ) + register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")}) + register_sqlstore_backends( + {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")} + ) + defaults = dict( + image_name="test_image", + apis=[], + providers={}, + storage=storage, + ) + defaults.update(overrides) + return StackRunConfig(**defaults) + + async def test_resolve_impls_basic(): # Create a real provider spec provider_spec = InlineProviderSpec( @@ -78,7 +114,7 @@ async def test_resolve_impls_basic(): # Create provider registry with our provider provider_registry = {Api.inference: {provider_spec.provider_type: provider_spec}} - run_config = StackRunConfig( + run_config = make_run_config( image_name="test_image", providers={ "inference": [ diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py index f21bbdd67..d6d4f4f23 100644 --- a/tests/unit/server/test_server.py +++ b/tests/unit/server/test_server.py @@ -41,7 +41,7 @@ class TestTranslateException: self.identifier = identifier self.owner = owner - resource = MockResource("vector_db", "test-db") + resource = MockResource("vector_store", "test-db") exc = AccessDeniedError("create", resource, user) result = translate_exception(exc) @@ -49,7 +49,7 @@ class TestTranslateException: assert isinstance(result, HTTPException) assert result.status_code == 403 assert "test-user" in result.detail - assert "vector_db::test-db" in result.detail + assert "vector_store::test-db" in result.detail assert "create" in result.detail assert "roles=['user']" in result.detail assert "teams=['dev']" in result.detail diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index f6d63490a..d2de1c759 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import time -from tempfile import TemporaryDirectory import pytest @@ -16,8 +15,16 @@ from llama_stack.apis.inference import ( OpenAIUserMessageParam, Order, ) +from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.inference.inference_store import InferenceStore -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register SQL store backends for testing.""" + db_path = str(tmp_path / "test.db") + register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=db_path)}) def create_test_chat_completion( @@ -44,167 +51,162 @@ def create_test_chat_completion( async def test_inference_store_pagination_basic(): """Test basic pagination functionality.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different timestamps - base_time = int(time.time()) - test_data = [ - ("zebra-task", base_time + 1), - ("apple-job", base_time + 2), - ("moon-work", base_time + 3), - ("banana-run", base_time + 4), - ("car-exec", base_time + 5), - ] + # Create test data with different timestamps + base_time = int(time.time()) + test_data = [ + ("zebra-task", base_time + 1), + ("apple-job", base_time + 2), + ("moon-work", base_time + 3), + ("banana-run", base_time + 4), + ("car-exec", base_time + 5), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test 1: First page with limit=2, descending order (default) - result = await store.list_chat_completions(limit=2, order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "car-exec" # Most recent first - assert result.data[1].id == "banana-run" - assert result.has_more is True - assert result.last_id == "banana-run" + # Test 1: First page with limit=2, descending order (default) + result = await store.list_chat_completions(limit=2, order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "car-exec" # Most recent first + assert result.data[1].id == "banana-run" + assert result.has_more is True + assert result.last_id == "banana-run" - # Test 2: Second page using 'after' parameter - result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) - assert len(result2.data) == 2 - assert result2.data[0].id == "moon-work" - assert result2.data[1].id == "apple-job" - assert result2.has_more is True + # Test 2: Second page using 'after' parameter + result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) + assert len(result2.data) == 2 + assert result2.data[0].id == "moon-work" + assert result2.data[1].id == "apple-job" + assert result2.has_more is True - # Test 3: Final page - result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) - assert len(result3.data) == 1 - assert result3.data[0].id == "zebra-task" - assert result3.has_more is False + # Test 3: Final page + result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) + assert len(result3.data) == 1 + assert result3.data[0].id == "zebra-task" + assert result3.has_more is False async def test_inference_store_pagination_ascending(): """Test pagination with ascending order.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("delta-item", base_time + 1), - ("charlie-task", base_time + 2), - ("alpha-work", base_time + 3), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("delta-item", base_time + 1), + ("charlie-task", base_time + 2), + ("alpha-work", base_time + 3), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test ascending order pagination - result = await store.list_chat_completions(limit=1, order=Order.asc) - assert len(result.data) == 1 - assert result.data[0].id == "delta-item" # Oldest first - assert result.has_more is True + # Test ascending order pagination + result = await store.list_chat_completions(limit=1, order=Order.asc) + assert len(result.data) == 1 + assert result.data[0].id == "delta-item" # Oldest first + assert result.has_more is True - # Second page with ascending order - result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) - assert len(result2.data) == 1 - assert result2.data[0].id == "charlie-task" - assert result2.has_more is True + # Second page with ascending order + result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) + assert len(result2.data) == 1 + assert result2.data[0].id == "charlie-task" + assert result2.has_more is True async def test_inference_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different models - base_time = int(time.time()) - test_data = [ - ("xyz-task", base_time + 1, "model-a"), - ("def-work", base_time + 2, "model-b"), - ("pqr-job", base_time + 3, "model-a"), - ("abc-run", base_time + 4, "model-b"), - ] + # Create test data with different models + base_time = int(time.time()) + test_data = [ + ("xyz-task", base_time + 1, "model-a"), + ("def-work", base_time + 2, "model-b"), + ("pqr-job", base_time + 3, "model-a"), + ("abc-run", base_time + 4, "model-b"), + ] - # Store test chat completions - for completion_id, timestamp, model in test_data: - completion = create_test_chat_completion(completion_id, timestamp, model) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp, model in test_data: + completion = create_test_chat_completion(completion_id, timestamp, model) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test pagination with model filter - result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) - assert len(result.data) == 1 - assert result.data[0].id == "pqr-job" # Most recent model-a - assert result.data[0].model == "model-a" - assert result.has_more is True + # Test pagination with model filter + result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) + assert len(result.data) == 1 + assert result.data[0].id == "pqr-job" # Most recent model-a + assert result.data[0].model == "model-a" + assert result.has_more is True - # Second page with model filter - result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) - assert len(result2.data) == 1 - assert result2.data[0].id == "xyz-task" - assert result2.data[0].model == "model-a" - assert result2.has_more is False + # Second page with model filter + result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) + assert len(result2.data) == 1 + assert result2.data[0].id == "xyz-task" + assert result2.data[0].model == "model-a" + assert result2.has_more is False async def test_inference_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Try to paginate with non-existent ID - with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): - await store.list_chat_completions(after="non-existent", limit=2) + # Try to paginate with non-existent ID + with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): + await store.list_chat_completions(after="non-existent", limit=2) async def test_inference_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("omega-first", base_time + 1), - ("beta-second", base_time + 2), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("omega-first", base_time + 1), + ("beta-second", base_time + 2), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test without limit - result = await store.list_chat_completions(order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "beta-second" # Most recent first - assert result.data[1].id == "omega-first" - assert result.has_more is False + # Test without limit + result = await store.list_chat_completions(order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "beta-second" # Most recent first + assert result.data[1].id == "omega-first" + assert result.has_more is False diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index c27b5a8e5..34cff3d3f 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -6,6 +6,7 @@ import time from tempfile import TemporaryDirectory +from uuid import uuid4 import pytest @@ -15,8 +16,18 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, ) from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.responses.responses_store import ResponsesStore -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + +def build_store(db_path: str, policy: list | None = None) -> ResponsesStore: + backend_name = f"sql_responses_{uuid4().hex}" + register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path)}) + return ResponsesStore( + ResponsesStoreReference(backend=backend_name, table_name="responses"), + policy=policy or [], + ) def create_test_response_object( @@ -54,7 +65,7 @@ async def test_responses_store_pagination_basic(): """Test basic pagination functionality for responses store.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data with different timestamps @@ -103,7 +114,7 @@ async def test_responses_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data @@ -141,7 +152,7 @@ async def test_responses_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data with different models @@ -182,7 +193,7 @@ async def test_responses_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Try to paginate with non-existent ID @@ -194,7 +205,7 @@ async def test_responses_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Create test data @@ -226,7 +237,7 @@ async def test_responses_store_get_response_object(): """Test retrieving a single response object.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response @@ -254,7 +265,7 @@ async def test_responses_store_input_items_pagination(): """Test pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response with many inputs with explicit IDs @@ -335,7 +346,7 @@ async def test_responses_store_input_items_before_pagination(): """Test before pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: db_path = tmp_dir + "/test.db" - store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) + store = build_store(db_path) await store.initialize() # Store a test response with many inputs with explicit IDs diff --git a/uv.lock b/uv.lock index f9806123d..bbc917df5 100644 --- a/uv.lock +++ b/uv.lock @@ -921,16 +921,16 @@ wheels = [ [[package]] name = "fastapi" -version = "0.116.1" +version = "0.119.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/d7/6c8b3bfe33eeffa208183ec037fee0cce9f7f024089ab1c5d12ef04bd27c/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143", size = 296485, upload-time = "2025-07-11T16:22:32.057Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/f9/5c5bcce82a7997cc0eb8c47b7800f862f6b56adc40486ed246e5010d443b/fastapi-0.119.0.tar.gz", hash = "sha256:451082403a2c1f0b99c6bd57c09110ed5463856804c8078d38e5a1f1035dbbb7", size = 336756, upload-time = "2025-10-11T17:13:40.53Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" }, + { url = "https://files.pythonhosted.org/packages/ce/70/584c4d7cad80f5e833715c0a29962d7c93b4d18eed522a02981a6d1b6ee5/fastapi-0.119.0-py3-none-any.whl", hash = "sha256:90a2e49ed19515320abb864df570dd766be0662c5d577688f1600170f7f73cf2", size = 107095, upload-time = "2025-10-11T17:13:39.048Z" }, ] [[package]] @@ -4129,27 +4129,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.9.10" +version = "0.14.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" }, - { url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" }, - { url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" }, - { url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" }, - { url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" }, - { url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" }, - { url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" }, - { url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" }, - { url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" }, - { url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" }, - { url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" }, - { url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" }, - { url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" }, - { url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" }, - { url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" }, + { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" }, + { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" }, + { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" }, + { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" }, + { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" }, + { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" }, + { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" }, + { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" }, + { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" }, + { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" }, + { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" }, + { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" }, + { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" }, + { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" }, + { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" }, + { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" }, ] [[package]] @@ -4525,31 +4526,31 @@ wheels = [ [[package]] name = "sqlalchemy" -version = "2.0.41" +version = "2.0.44" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/f2/840d7b9496825333f532d2e3976b8eadbf52034178aac53630d09fe6e1ef/sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22", size = 9819830, upload-time = "2025-10-10T14:39:12.935Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645, upload-time = "2025-05-14T17:55:24.854Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399, upload-time = "2025-05-14T17:55:28.097Z" }, - { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269, upload-time = "2025-05-14T17:50:38.227Z" }, - { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364, upload-time = "2025-05-14T17:51:49.829Z" }, - { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072, upload-time = "2025-05-14T17:50:39.774Z" }, - { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074, upload-time = "2025-05-14T17:51:51.736Z" }, - { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514, upload-time = "2025-05-14T17:55:49.915Z" }, - { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557, upload-time = "2025-05-14T17:55:51.349Z" }, - { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" }, - { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" }, - { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" }, - { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" }, - { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420, upload-time = "2025-05-14T17:55:52.69Z" }, - { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329, upload-time = "2025-05-14T17:55:54.495Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, + { url = "https://files.pythonhosted.org/packages/62/c4/59c7c9b068e6813c898b771204aad36683c96318ed12d4233e1b18762164/sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250", size = 2139675, upload-time = "2025-10-10T16:03:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/d6/ae/eeb0920537a6f9c5a3708e4a5fc55af25900216bdb4847ec29cfddf3bf3a/sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29", size = 2127726, upload-time = "2025-10-10T16:03:35.934Z" }, + { url = "https://files.pythonhosted.org/packages/d8/d5/2ebbabe0379418eda8041c06b0b551f213576bfe4c2f09d77c06c07c8cc5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44", size = 3327603, upload-time = "2025-10-10T15:35:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/45/e5/5aa65852dadc24b7d8ae75b7efb8d19303ed6ac93482e60c44a585930ea5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1", size = 3337842, upload-time = "2025-10-10T15:43:45.431Z" }, + { url = "https://files.pythonhosted.org/packages/41/92/648f1afd3f20b71e880ca797a960f638d39d243e233a7082c93093c22378/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7", size = 3264558, upload-time = "2025-10-10T15:35:29.93Z" }, + { url = "https://files.pythonhosted.org/packages/40/cf/e27d7ee61a10f74b17740918e23cbc5bc62011b48282170dc4c66da8ec0f/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d", size = 3301570, upload-time = "2025-10-10T15:43:48.407Z" }, + { url = "https://files.pythonhosted.org/packages/3b/3d/3116a9a7b63e780fb402799b6da227435be878b6846b192f076d2f838654/sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4", size = 2103447, upload-time = "2025-10-10T15:03:21.678Z" }, + { url = "https://files.pythonhosted.org/packages/25/83/24690e9dfc241e6ab062df82cc0df7f4231c79ba98b273fa496fb3dd78ed/sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e", size = 2130912, upload-time = "2025-10-10T15:03:24.656Z" }, + { url = "https://files.pythonhosted.org/packages/45/d3/c67077a2249fdb455246e6853166360054c331db4613cda3e31ab1cadbef/sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1", size = 2135479, upload-time = "2025-10-10T16:03:37.671Z" }, + { url = "https://files.pythonhosted.org/packages/2b/91/eabd0688330d6fd114f5f12c4f89b0d02929f525e6bf7ff80aa17ca802af/sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45", size = 2123212, upload-time = "2025-10-10T16:03:41.755Z" }, + { url = "https://files.pythonhosted.org/packages/b0/bb/43e246cfe0e81c018076a16036d9b548c4cc649de241fa27d8d9ca6f85ab/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976", size = 3255353, upload-time = "2025-10-10T15:35:31.221Z" }, + { url = "https://files.pythonhosted.org/packages/b9/96/c6105ed9a880abe346b64d3b6ddef269ddfcab04f7f3d90a0bf3c5a88e82/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c", size = 3260222, upload-time = "2025-10-10T15:43:50.124Z" }, + { url = "https://files.pythonhosted.org/packages/44/16/1857e35a47155b5ad927272fee81ae49d398959cb749edca6eaa399b582f/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d", size = 3189614, upload-time = "2025-10-10T15:35:32.578Z" }, + { url = "https://files.pythonhosted.org/packages/88/ee/4afb39a8ee4fc786e2d716c20ab87b5b1fb33d4ac4129a1aaa574ae8a585/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40", size = 3226248, upload-time = "2025-10-10T15:43:51.862Z" }, + { url = "https://files.pythonhosted.org/packages/32/d5/0e66097fc64fa266f29a7963296b40a80d6a997b7ac13806183700676f86/sqlalchemy-2.0.44-cp313-cp313-win32.whl", hash = "sha256:ee51625c2d51f8baadf2829fae817ad0b66b140573939dd69284d2ba3553ae73", size = 2101275, upload-time = "2025-10-10T15:03:26.096Z" }, + { url = "https://files.pythonhosted.org/packages/03/51/665617fe4f8c6450f42a6d8d69243f9420f5677395572c2fe9d21b493b7b/sqlalchemy-2.0.44-cp313-cp313-win_amd64.whl", hash = "sha256:c1c80faaee1a6c3428cecf40d16a2365bcf56c424c92c2b6f0f9ad204b899e9e", size = 2127901, upload-time = "2025-10-10T15:03:27.548Z" }, + { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, ] [package.optional-dependencies] @@ -5230,7 +5231,7 @@ wheels = [ [[package]] name = "weaviate-client" -version = "4.16.9" +version = "4.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "authlib" }, @@ -5241,9 +5242,9 @@ dependencies = [ { name = "pydantic" }, { name = "validators" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/e4/6a0b1501645f17a851067fc7bd0d5b53dc9777f2818be9c43debe06eda19/weaviate_client-4.16.9.tar.gz", hash = "sha256:d461071f1ff5ebddd0fc697959628a1d8caa12af1da071401ef25583c3084eba", size = 766390, upload-time = "2025-08-20T15:00:03.924Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/0e/e4582b007427187a9fde55fa575db4b766c81929d2b43a3dd8becce50567/weaviate_client-4.17.0.tar.gz", hash = "sha256:731d58d84b0989df4db399b686357ed285fb95971a492ccca8dec90bb2343c51", size = 769019, upload-time = "2025-09-26T11:20:27.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/1a/fc66f5f33961351c759d56453d18176849da8f64186c941183bb574b808b/weaviate_client-4.16.9-py3-none-any.whl", hash = "sha256:8b4adabaec0d513edef94c8c1de61c89a86eba3b63a4dc1acdfc9580e80199f4", size = 579098, upload-time = "2025-08-20T15:00:01.882Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/2da3a45866da7a935dab8ad07be05dcaee48b3ad4955144583b651929be7/weaviate_client-4.17.0-py3-none-any.whl", hash = "sha256:60e4a355b90537ee1e942ab0b76a94750897a13d9cf13c5a6decbd166d0ca8b5", size = 582763, upload-time = "2025-09-26T11:20:25.864Z" }, ] [[package]]