diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index 3929df09c..ac600d570 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -86,10 +86,9 @@ runs: if: ${{ always() }} shell: bash run: | - sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true - distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://') - stack_container_name="llama-stack-test-$distro_name" - sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true + # Ollama logs (if ollama container exists) + sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + # Note: distro container logs are now dumped in integration-tests.sh before container is removed - name: Upload logs if: ${{ always() }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b5845be53..0fdd50acc 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -37,7 +37,7 @@ jobs: .pre-commit-config.yaml - name: Set up Node.js - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: '20' cache: 'npm' diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml index 0c23b57de..b05898d29 100644 --- a/.github/workflows/precommit-trigger.yml +++ b/.github/workflows/precommit-trigger.yml @@ -99,7 +99,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...` + body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...` }); - name: Checkout PR branch (same-repo) @@ -141,7 +141,7 @@ jobs: - name: Set up Node.js if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: '20' cache: 'npm' diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index df491b680..e30e1e5fb 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -36,7 +36,7 @@ jobs: distros: ${{ steps.set-matrix.outputs.distros }} steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Generate Distribution List id: set-matrix @@ -55,7 +55,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -92,7 +92,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index dfa844175..96243285f 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -24,7 +24,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install uv - uses: astral-sh/setup-uv@eb1897b8dc4b5d5bfe39a428a8f2304605e0983c # v7.0.0 + uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0 with: python-version: ${{ matrix.python-version }} activate-environment: true diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index c16f512d1..e8f318b8e 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -29,7 +29,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup Node.js - uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: ${{ matrix.node-version }} cache: 'npm' diff --git a/client-sdks/stainless/openapi.stainless.yml b/client-sdks/stainless/openapi.stainless.yml index 0a5dfc044..9461be996 100644 --- a/client-sdks/stainless/openapi.stainless.yml +++ b/client-sdks/stainless/openapi.stainless.yml @@ -208,19 +208,6 @@ resources: type: http endpoint: post /v1/conversations/{conversation_id}/items - datasets: - models: - list_datasets_response: ListDatasetsResponse - methods: - register: post /v1beta/datasets - retrieve: get /v1beta/datasets/{dataset_id} - list: - endpoint: get /v1beta/datasets - paginated: false - unregister: delete /v1beta/datasets/{dataset_id} - iterrows: get /v1beta/datasetio/iterrows/{dataset_id} - appendrows: post /v1beta/datasetio/append-rows/{dataset_id} - inspect: models: healthInfo: HealthInfo @@ -521,6 +508,21 @@ resources: stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk param_discriminator: stream + beta: + subresources: + datasets: + models: + list_datasets_response: ListDatasetsResponse + methods: + register: post /v1beta/datasets + retrieve: get /v1beta/datasets/{dataset_id} + list: + endpoint: get /v1beta/datasets + paginated: false + unregister: delete /v1beta/datasets/{dataset_id} + iterrows: get /v1beta/datasetio/iterrows/{dataset_id} + appendrows: post /v1beta/datasetio/append-rows/{dataset_id} + settings: license: MIT diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index eff01931f..98a309f12 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2039,69 +2039,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -6440,7 +6377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9132,7 +9069,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9440,7 +9377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9921,274 +9858,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: @@ -10203,7 +9872,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -11325,7 +10994,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -12652,7 +12321,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index b7134b3e1..666850976 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -21,7 +21,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo | inference | `inline::meta-reference` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::model-context-protocol` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 4a7d99ff5..b1de9ddb8 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -16,7 +16,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | post_training | `remote::nvidia` | | safety | `remote::nvidia` | | scoring | `inline::basic` | -| tool_runtime | `inline::rag-runtime` | +| tool_runtime | | | vector_io | `inline::faiss` | diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index 98a372250..459498a59 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -28,7 +28,7 @@ description: | #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted - in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. + in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times @@ -233,7 +233,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. +in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times diff --git a/docs/docs/references/llama_stack_client_cli_reference.md b/docs/docs/references/llama_stack_client_cli_reference.md index 9bb514a2d..a4321938a 100644 --- a/docs/docs/references/llama_stack_client_cli_reference.md +++ b/docs/docs/references/llama_stack_client_cli_reference.md @@ -32,7 +32,6 @@ Commands: scoring_functions Manage scoring functions. shields Manage safety shield services. toolgroups Manage available tool groups. - vector_dbs Manage vector databases. ``` ### `llama-stack-client configure` @@ -211,53 +210,6 @@ Unregister a model from distribution endpoint llama-stack-client models unregister ``` -## Vector DB Management -Manage vector databases. - - -### `llama-stack-client vector_dbs list` -Show available vector dbs on distribution endpoint -```bash -llama-stack-client vector_dbs list -``` -``` -┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │ -│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │ -│ │ │ │ │ type: vector_db │ -│ │ │ │ │ │ -└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘ -``` - -### `llama-stack-client vector_dbs register` -Create a new vector db -```bash -llama-stack-client vector_dbs register [--provider-id ] [--provider-vector-db-id ] [--embedding-model ] [--embedding-dimension ] -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - -Optional arguments: -- `--provider-id`: Provider ID for the vector db -- `--provider-vector-db-id`: Provider's vector db ID -- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5` -- `--embedding-dimension`: Dimension of embeddings. Default: 768 - -### `llama-stack-client vector_dbs unregister` -Delete a vector db -```bash -llama-stack-client vector_dbs unregister -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - - ## Shield Management Manage safety shield services. ### `llama-stack-client shields list` diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index 2970d7e53..e5f33f13d 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -196,16 +196,10 @@ def _get_endpoint_functions( def _get_defining_class(member_fn: str, derived_cls: type) -> type: "Find the class in which a member function is first defined in a class inheritance hierarchy." - # This import must be dynamic here - from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime - # iterate in reverse member resolution order to find most specific class first for cls in reversed(inspect.getmro(derived_cls)): for name, _ in inspect.getmembers(cls, inspect.isfunction): if name == member_fn: - # HACK ALERT - if cls == RAGToolRuntime: - return ToolRuntime return cls raise ValidationError( diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 98ed50c4f..d920317cf 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -5547,7 +5547,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -5798,7 +5798,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 99c8dd03e..66b2caeca 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -4114,7 +4114,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -4303,7 +4303,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html index 7d572f89f..ab474180e 100644 --- a/docs/static/experimental-llama-stack-spec.html +++ b/docs/static/experimental-llama-stack-spec.html @@ -1850,7 +1850,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -3983,7 +3983,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index fee20814c..dd9e43cc5 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -1320,7 +1320,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -2927,7 +2927,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 1091a1cb6..7dfb2ed13 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -2624,89 +2624,6 @@ "deprecated": false } }, - "/v1/tool-runtime/rag-tool/insert": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Index documents so they can be used by the RAG system.", - "description": "Index documents so they can be used by the RAG system.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InsertRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1/tool-runtime/rag-tool/query": { - "post": { - "responses": { - "200": { - "description": "RAGQueryResult containing the retrieved content and metadata", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RAGQueryResult" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Query the RAG system for context; typically invoked by the agent.", - "description": "Query the RAG system for context; typically invoked by the agent.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/toolgroups": { "get": { "responses": { @@ -6800,7 +6717,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -10205,7 +10122,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -10687,7 +10604,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -11383,346 +11300,6 @@ "title": "ListToolDefsResponse", "description": "Response containing a list of tool definitions." }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { - "type": "string", - "description": "The unique identifier for the document." - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ], - "description": "The content of the document." - }, - "mime_type": { - "type": "string", - "description": "The MIME type of the document." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the document." - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ], - "title": "RAGDocument", - "description": "A document to be used for document ingestion in the RAG Tool." - }, - "InsertRequest": { - "type": "object", - "properties": { - "documents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RAGDocument" - }, - "description": "List of documents to index in the RAG system" - }, - "vector_db_id": { - "type": "string", - "description": "ID of the vector database to store the document embeddings" - }, - "chunk_size_in_tokens": { - "type": "integer", - "description": "(Optional) Size in tokens for document chunking during indexing" - } - }, - "additionalProperties": false, - "required": [ - "documents", - "vector_db_id", - "chunk_size_in_tokens" - ], - "title": "InsertRequest" - }, - "DefaultRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default", - "default": "default", - "description": "Type of query generator, always 'default'" - }, - "separator": { - "type": "string", - "default": " ", - "description": "String separator used to join query terms" - } - }, - "additionalProperties": false, - "required": [ - "type", - "separator" - ], - "title": "DefaultRAGQueryGeneratorConfig", - "description": "Configuration for the default RAG query generator." - }, - "LLMRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm", - "default": "llm", - "description": "Type of query generator, always 'llm'" - }, - "model": { - "type": "string", - "description": "Name of the language model to use for query generation" - }, - "template": { - "type": "string", - "description": "Template string for formatting the query generation prompt" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ], - "title": "LLMRAGQueryGeneratorConfig", - "description": "Configuration for the LLM-based RAG query generator." - }, - "RAGQueryConfig": { - "type": "object", - "properties": { - "query_generator_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" - }, - { - "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", - "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - }, - "description": "Configuration for the query generator." - }, - "max_tokens_in_context": { - "type": "integer", - "default": 4096, - "description": "Maximum number of tokens in the context." - }, - "max_chunks": { - "type": "integer", - "default": 5, - "description": "Maximum number of chunks to retrieve." - }, - "chunk_template": { - "type": "string", - "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" - }, - "mode": { - "$ref": "#/components/schemas/RAGSearchMode", - "default": "vector", - "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." - }, - "ranker": { - "$ref": "#/components/schemas/Ranker", - "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." - } - }, - "additionalProperties": false, - "required": [ - "query_generator_config", - "max_tokens_in_context", - "max_chunks", - "chunk_template" - ], - "title": "RAGQueryConfig", - "description": "Configuration for the RAG query generation." - }, - "RAGSearchMode": { - "type": "string", - "enum": [ - "vector", - "keyword", - "hybrid" - ], - "title": "RAGSearchMode", - "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" - }, - "RRFRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rrf", - "default": "rrf", - "description": "The type of ranker, always \"rrf\"" - }, - "impact_factor": { - "type": "number", - "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" - } - }, - "additionalProperties": false, - "required": [ - "type", - "impact_factor" - ], - "title": "RRFRanker", - "description": "Reciprocal Rank Fusion (RRF) ranker configuration." - }, - "Ranker": { - "oneOf": [ - { - "$ref": "#/components/schemas/RRFRanker" - }, - { - "$ref": "#/components/schemas/WeightedRanker" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "rrf": "#/components/schemas/RRFRanker", - "weighted": "#/components/schemas/WeightedRanker" - } - } - }, - "WeightedRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "weighted", - "default": "weighted", - "description": "The type of ranker, always \"weighted\"" - }, - "alpha": { - "type": "number", - "default": 0.5, - "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." - } - }, - "additionalProperties": false, - "required": [ - "type", - "alpha" - ], - "title": "WeightedRanker", - "description": "Weighted ranker configuration that combines vector and keyword scores." - }, - "QueryRequest": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query content to search for in the indexed documents" - }, - "vector_db_ids": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of vector database IDs to search within" - }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig", - "description": "(Optional) Configuration parameters for the query operation" - } - }, - "additionalProperties": false, - "required": [ - "content", - "vector_db_ids" - ], - "title": "QueryRequest" - }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The retrieved content from the query" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata about the query result" - } - }, - "additionalProperties": false, - "required": [ - "metadata" - ], - "title": "RAGQueryResult", - "description": "Result of a RAG query containing retrieved content and metadata." - }, "ToolGroup": { "type": "object", "properties": { @@ -11740,7 +11317,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 6c3702374..1b0fefe55 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2036,69 +2036,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -5227,7 +5164,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -7919,7 +7856,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8227,7 +8164,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8708,274 +8645,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: @@ -8990,7 +8659,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index ee0a265d3..7930b28e6 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -2624,89 +2624,6 @@ "deprecated": false } }, - "/v1/tool-runtime/rag-tool/insert": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Index documents so they can be used by the RAG system.", - "description": "Index documents so they can be used by the RAG system.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InsertRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1/tool-runtime/rag-tool/query": { - "post": { - "responses": { - "200": { - "description": "RAGQueryResult containing the retrieved content and metadata", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RAGQueryResult" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Query the RAG system for context; typically invoked by the agent.", - "description": "Query the RAG system for context; typically invoked by the agent.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/toolgroups": { "get": { "responses": { @@ -8472,7 +8389,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -11877,7 +11794,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -12359,7 +12276,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -13055,346 +12972,6 @@ "title": "ListToolDefsResponse", "description": "Response containing a list of tool definitions." }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { - "type": "string", - "description": "The unique identifier for the document." - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ], - "description": "The content of the document." - }, - "mime_type": { - "type": "string", - "description": "The MIME type of the document." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the document." - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ], - "title": "RAGDocument", - "description": "A document to be used for document ingestion in the RAG Tool." - }, - "InsertRequest": { - "type": "object", - "properties": { - "documents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RAGDocument" - }, - "description": "List of documents to index in the RAG system" - }, - "vector_db_id": { - "type": "string", - "description": "ID of the vector database to store the document embeddings" - }, - "chunk_size_in_tokens": { - "type": "integer", - "description": "(Optional) Size in tokens for document chunking during indexing" - } - }, - "additionalProperties": false, - "required": [ - "documents", - "vector_db_id", - "chunk_size_in_tokens" - ], - "title": "InsertRequest" - }, - "DefaultRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default", - "default": "default", - "description": "Type of query generator, always 'default'" - }, - "separator": { - "type": "string", - "default": " ", - "description": "String separator used to join query terms" - } - }, - "additionalProperties": false, - "required": [ - "type", - "separator" - ], - "title": "DefaultRAGQueryGeneratorConfig", - "description": "Configuration for the default RAG query generator." - }, - "LLMRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm", - "default": "llm", - "description": "Type of query generator, always 'llm'" - }, - "model": { - "type": "string", - "description": "Name of the language model to use for query generation" - }, - "template": { - "type": "string", - "description": "Template string for formatting the query generation prompt" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ], - "title": "LLMRAGQueryGeneratorConfig", - "description": "Configuration for the LLM-based RAG query generator." - }, - "RAGQueryConfig": { - "type": "object", - "properties": { - "query_generator_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" - }, - { - "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", - "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - }, - "description": "Configuration for the query generator." - }, - "max_tokens_in_context": { - "type": "integer", - "default": 4096, - "description": "Maximum number of tokens in the context." - }, - "max_chunks": { - "type": "integer", - "default": 5, - "description": "Maximum number of chunks to retrieve." - }, - "chunk_template": { - "type": "string", - "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" - }, - "mode": { - "$ref": "#/components/schemas/RAGSearchMode", - "default": "vector", - "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." - }, - "ranker": { - "$ref": "#/components/schemas/Ranker", - "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." - } - }, - "additionalProperties": false, - "required": [ - "query_generator_config", - "max_tokens_in_context", - "max_chunks", - "chunk_template" - ], - "title": "RAGQueryConfig", - "description": "Configuration for the RAG query generation." - }, - "RAGSearchMode": { - "type": "string", - "enum": [ - "vector", - "keyword", - "hybrid" - ], - "title": "RAGSearchMode", - "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" - }, - "RRFRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rrf", - "default": "rrf", - "description": "The type of ranker, always \"rrf\"" - }, - "impact_factor": { - "type": "number", - "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" - } - }, - "additionalProperties": false, - "required": [ - "type", - "impact_factor" - ], - "title": "RRFRanker", - "description": "Reciprocal Rank Fusion (RRF) ranker configuration." - }, - "Ranker": { - "oneOf": [ - { - "$ref": "#/components/schemas/RRFRanker" - }, - { - "$ref": "#/components/schemas/WeightedRanker" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "rrf": "#/components/schemas/RRFRanker", - "weighted": "#/components/schemas/WeightedRanker" - } - } - }, - "WeightedRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "weighted", - "default": "weighted", - "description": "The type of ranker, always \"weighted\"" - }, - "alpha": { - "type": "number", - "default": 0.5, - "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." - } - }, - "additionalProperties": false, - "required": [ - "type", - "alpha" - ], - "title": "WeightedRanker", - "description": "Weighted ranker configuration that combines vector and keyword scores." - }, - "QueryRequest": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query content to search for in the indexed documents" - }, - "vector_db_ids": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of vector database IDs to search within" - }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig", - "description": "(Optional) Configuration parameters for the query operation" - } - }, - "additionalProperties": false, - "required": [ - "content", - "vector_db_ids" - ], - "title": "QueryRequest" - }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The retrieved content from the query" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata about the query result" - } - }, - "additionalProperties": false, - "required": [ - "metadata" - ], - "title": "RAGQueryResult", - "description": "Result of a RAG query containing retrieved content and metadata." - }, "ToolGroup": { "type": "object", "properties": { @@ -13412,7 +12989,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -14959,7 +14536,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -16704,7 +16281,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index eff01931f..98a309f12 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2039,69 +2039,6 @@ paths: schema: $ref: '#/components/schemas/URL' deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false /v1/toolgroups: get: responses: @@ -6440,7 +6377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9132,7 +9069,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9440,7 +9377,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9921,274 +9858,6 @@ components: title: ListToolDefsResponse description: >- Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. ToolGroup: type: object properties: @@ -10203,7 +9872,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -11325,7 +10994,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -12652,7 +12321,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 5777f3d04..948ec615f 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta): models = "models" shields = "shields" - vector_dbs = "vector_dbs" # only used for routing + vector_stores = "vector_stores" # only used for routing table datasets = "datasets" scoring_functions = "scoring_functions" benchmarks = "benchmarks" diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py index 7c4130f7d..dafdb28b0 100644 --- a/llama_stack/apis/resource.py +++ b/llama_stack/apis/resource.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, Field class ResourceType(StrEnum): model = "model" shield = "shield" - vector_db = "vector_db" + vector_store = "vector_store" dataset = "dataset" scoring_function = "scoring_function" benchmark = "benchmark" @@ -34,4 +34,4 @@ class Resource(BaseModel): provider_id: str = Field(description="ID of the provider that owns this resource") - type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)") + type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)") diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py index b25310ecf..2908d1c62 100644 --- a/llama_stack/apis/tools/__init__.py +++ b/llama_stack/apis/tools/__init__.py @@ -4,5 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .rag_tool import * from .tools import * diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py deleted file mode 100644 index ed7847e23..000000000 --- a/llama_stack/apis/tools/rag_tool.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field, field_validator -from typing_extensions import runtime_checkable - -from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -@json_schema_type -class RRFRanker(BaseModel): - """ - Reciprocal Rank Fusion (RRF) ranker configuration. - - :param type: The type of ranker, always "rrf" - :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. - Must be greater than 0 - """ - - type: Literal["rrf"] = "rrf" - impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance - - -@json_schema_type -class WeightedRanker(BaseModel): - """ - Weighted ranker configuration that combines vector and keyword scores. - - :param type: The type of ranker, always "weighted" - :param alpha: Weight factor between 0 and 1. - 0 means only use keyword scores, - 1 means only use vector scores, - values in between blend both scores. - """ - - type: Literal["weighted"] = "weighted" - alpha: float = Field( - default=0.5, - ge=0.0, - le=1.0, - description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.", - ) - - -Ranker = Annotated[ - RRFRanker | WeightedRanker, - Field(discriminator="type"), -] -register_schema(Ranker, name="Ranker") - - -@json_schema_type -class RAGDocument(BaseModel): - """ - A document to be used for document ingestion in the RAG Tool. - - :param document_id: The unique identifier for the document. - :param content: The content of the document. - :param mime_type: The MIME type of the document. - :param metadata: Additional metadata for the document. - """ - - document_id: str - content: InterleavedContent | URL - mime_type: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryResult(BaseModel): - """Result of a RAG query containing retrieved content and metadata. - - :param content: (Optional) The retrieved content from the query - :param metadata: Additional metadata about the query result - """ - - content: InterleavedContent | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryGenerator(Enum): - """Types of query generators for RAG systems. - - :cvar default: Default query generator using simple text processing - :cvar llm: LLM-based query generator for enhanced query understanding - :cvar custom: Custom query generator implementation - """ - - default = "default" - llm = "llm" - custom = "custom" - - -@json_schema_type -class RAGSearchMode(StrEnum): - """ - Search modes for RAG query retrieval: - - VECTOR: Uses vector similarity search for semantic matching - - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - """ - - VECTOR = "vector" - KEYWORD = "keyword" - HYBRID = "hybrid" - - -@json_schema_type -class DefaultRAGQueryGeneratorConfig(BaseModel): - """Configuration for the default RAG query generator. - - :param type: Type of query generator, always 'default' - :param separator: String separator used to join query terms - """ - - type: Literal["default"] = "default" - separator: str = " " - - -@json_schema_type -class LLMRAGQueryGeneratorConfig(BaseModel): - """Configuration for the LLM-based RAG query generator. - - :param type: Type of query generator, always 'llm' - :param model: Name of the language model to use for query generation - :param template: Template string for formatting the query generation prompt - """ - - type: Literal["llm"] = "llm" - model: str - template: str - - -RAGQueryGeneratorConfig = Annotated[ - DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig, - Field(discriminator="type"), -] -register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig") - - -@json_schema_type -class RAGQueryConfig(BaseModel): - """ - Configuration for the RAG query generation. - - :param query_generator_config: Configuration for the query generator. - :param max_tokens_in_context: Maximum number of tokens in the context. - :param max_chunks: Maximum number of chunks to retrieve. - :param chunk_template: Template for formatting each retrieved chunk in the context. - Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). - Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" - :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector". - :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker. - """ - - # This config defines how a query is generated using the messages - # for memory bank retrieval. - query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig()) - max_tokens_in_context: int = 4096 - max_chunks: int = 5 - chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" - mode: RAGSearchMode | None = RAGSearchMode.VECTOR - ranker: Ranker | None = Field(default=None) # Only used for hybrid mode - - @field_validator("chunk_template") - def validate_chunk_template(cls, v: str) -> str: - if "{chunk.content}" not in v: - raise ValueError("chunk_template must contain {chunk.content}") - if "{index}" not in v: - raise ValueError("chunk_template must contain {index}") - if len(v) == 0: - raise ValueError("chunk_template must not be empty") - return v - - -@runtime_checkable -@trace_protocol -class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - """Index documents so they can be used by the RAG system. - - :param documents: List of documents to index in the RAG system - :param vector_db_id: ID of the vector database to store the document embeddings - :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing - """ - ... - - @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1) - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - """Query the RAG system for context; typically invoked by the agent. - - :param content: The query content to search for in the indexed documents - :param vector_db_ids: List of vector database IDs to search within - :param query_config: (Optional) Configuration parameters for the query operation - :returns: RAGQueryResult containing the retrieved content and metadata - """ - ... diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py index b6a1a2543..feac0d33e 100644 --- a/llama_stack/apis/tools/tools.py +++ b/llama_stack/apis/tools/tools.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum from typing import Any, Literal, Protocol from pydantic import BaseModel @@ -16,8 +15,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod -from .rag_tool import RAGToolRuntime - @json_schema_type class ToolDef(BaseModel): @@ -181,22 +178,11 @@ class ToolGroups(Protocol): ... -class SpecialToolGroup(Enum): - """Special tool groups with predefined functionality. - - :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval - """ - - rag_tool = "rag_tool" - - @runtime_checkable @trace_protocol class ToolRuntime(Protocol): tool_store: ToolStore | None = None - rag_tool: RAGToolRuntime | None = None - # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1) async def list_runtime_tools( diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py deleted file mode 100644 index 0368095cb..000000000 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Literal, Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class VectorDB(Resource): - """Vector database resource for storing and querying vector embeddings. - - :param type: Type of resource, always 'vector_db' for vector databases - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - """ - - type: Literal[ResourceType.vector_db] = ResourceType.vector_db - - embedding_model: str - embedding_dimension: int - vector_db_name: str | None = None - - @property - def vector_db_id(self) -> str: - return self.identifier - - @property - def provider_vector_db_id(self) -> str | None: - return self.provider_resource_id - - -class VectorDBInput(BaseModel): - """Input parameters for creating or configuring a vector database. - - :param vector_db_id: Unique identifier for the vector database - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database - """ - - vector_db_id: str - embedding_model: str - embedding_dimension: int - provider_id: str | None = None - provider_vector_db_id: str | None = None - - -class ListVectorDBsResponse(BaseModel): - """Response from listing vector databases. - - :param data: List of vector databases - """ - - data: list[VectorDB] - - -@runtime_checkable -class VectorDBs(Protocol): - """Internal protocol for vector_dbs routing - no public API endpoints.""" - - async def list_vector_dbs(self) -> ListVectorDBsResponse: - """Internal method to list vector databases.""" - ... - - async def get_vector_db( - self, - vector_db_id: str, - ) -> VectorDB: - """Internal method to get a vector database by ID.""" - ... - - async def register_vector_db( - self, - vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - vector_db_name: str | None = None, - provider_vector_db_id: str | None = None, - ) -> VectorDB: - """Internal method to register a vector database.""" - ... - - async def unregister_vector_db(self, vector_db_id: str) -> None: - """Internal method to unregister a vector database.""" - ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index a309c47f9..49e4df039 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -15,7 +15,7 @@ from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id @@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel): total: int +# TODO: rename this as OpenAIVectorStore @json_schema_type class VectorStoreObject(BaseModel): """OpenAI Vector Store object. @@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all chunking_strategy: VectorStoreChunkingStrategy | None = None -class VectorDBStore(Protocol): - def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... +class VectorStoreTable(Protocol): + def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ... @runtime_checkable @trace_protocol class VectorIO(Protocol): - vector_db_store: VectorDBStore | None = None + vector_store_table: VectorStoreTable | None = None # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert_chunks( self, @@ -546,6 +548,7 @@ class VectorIO(Protocol): """ ... + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) async def query_chunks( self, diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_stores/__init__.py similarity index 87% rename from llama_stack/apis/vector_dbs/__init__.py rename to llama_stack/apis/vector_stores/__init__.py index af34ba9d4..8fc34058a 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_stores/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * +from .vector_stores import * diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/llama_stack/apis/vector_stores/vector_stores.py new file mode 100644 index 000000000..524624028 --- /dev/null +++ b/llama_stack/apis/vector_stores/vector_stores.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Literal + +from pydantic import BaseModel + +from llama_stack.apis.resource import Resource, ResourceType + + +# Internal resource type for storing the vector store routing and other information +class VectorStore(Resource): + """Vector database resource for storing and querying vector embeddings. + + :param type: Type of resource, always 'vector_store' for vector stores + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + """ + + type: Literal[ResourceType.vector_store] = ResourceType.vector_store + + embedding_model: str + embedding_dimension: int + vector_store_name: str | None = None + + @property + def vector_store_id(self) -> str: + return self.identifier + + @property + def provider_vector_store_id(self) -> str | None: + return self.provider_resource_id + + +class VectorStoreInput(BaseModel): + """Input parameters for creating or configuring a vector database. + + :param vector_store_id: Unique identifier for the vector store + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store + """ + + vector_store_id: str + embedding_model: str + embedding_dimension: int + provider_id: str | None = None + provider_vector_store_id: str | None = None diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py deleted file mode 100644 index 2a30ff394..000000000 --- a/llama_stack/cli/stack/_build.py +++ /dev/null @@ -1,519 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import importlib.resources -import json -import os -import shutil -import sys -import textwrap -from functools import lru_cache -from importlib.abc import Traversable -from pathlib import Path - -import yaml -from prompt_toolkit import prompt -from prompt_toolkit.completion import WordCompleter -from prompt_toolkit.validation import Validator -from termcolor import colored, cprint - -from llama_stack.cli.stack.utils import ImageType -from llama_stack.cli.table import print_table -from llama_stack.core.build import ( - SERVER_DEPENDENCIES, - build_image, - get_provider_dependencies, -) -from llama_stack.core.configure import parse_and_maybe_upgrade_config -from llama_stack.core.datatypes import ( - BuildConfig, - BuildProvider, - DistributionSpec, - Provider, - StackRunConfig, -) -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.external import load_external_apis -from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.stack import replace_env_vars -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - ServerStoresConfig, - SqliteKVStoreConfig, - SqliteSqlStoreConfig, - SqlStoreReference, - StorageConfig, -) -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.core.utils.exec import formulate_run_args, run_command -from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack.providers.datatypes import Api - -DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions" - - -@lru_cache -def available_distros_specs() -> dict[str, BuildConfig]: - import yaml - - distro_specs = {} - for p in DISTRIBS_PATH.rglob("*build.yaml"): - distro_name = p.parent.name - with open(p) as f: - build_config = BuildConfig(**yaml.safe_load(f)) - distro_specs[distro_name] = build_config - return distro_specs - - -def run_stack_build_command(args: argparse.Namespace) -> None: - if args.list_distros: - return _run_distro_list_cmd() - - if args.image_type == ImageType.VENV.value: - current_venv = os.environ.get("VIRTUAL_ENV") - image_name = args.image_name or current_venv - else: - image_name = args.image_name - - if args.template: - cprint( - "The --template argument is deprecated. Please use --distro instead.", - color="red", - file=sys.stderr, - ) - distro_name = args.template - else: - distro_name = args.distribution - - if distro_name: - available_distros = available_distros_specs() - if distro_name not in available_distros: - cprint( - f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions", - color="red", - file=sys.stderr, - ) - sys.exit(1) - build_config = available_distros[distro_name] - if args.image_type: - build_config.image_type = args.image_type - else: - cprint( - f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - elif args.providers: - provider_list: dict[str, list[BuildProvider]] = dict() - for api_provider in args.providers.split(","): - if "=" not in api_provider: - cprint( - "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2", - color="red", - file=sys.stderr, - ) - sys.exit(1) - api, provider_type = api_provider.split("=") - providers_for_api = get_provider_registry().get(Api(api), None) - if providers_for_api is None: - cprint( - f"{api} is not a valid API.", - color="red", - file=sys.stderr, - ) - sys.exit(1) - if provider_type in providers_for_api: - provider = BuildProvider( - provider_type=provider_type, - module=None, - ) - provider_list.setdefault(api, []).append(provider) - else: - cprint( - f"{provider} is not a valid provider for the {api} API.", - color="red", - file=sys.stderr, - ) - sys.exit(1) - distribution_spec = DistributionSpec( - providers=provider_list, - description=",".join(args.providers), - ) - if not args.image_type: - cprint( - f"Please specify a image-type (container | venv) for {args.template}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec) - elif not args.config and not distro_name: - name = prompt( - "> Enter a name for your Llama Stack (e.g. my-local-stack): ", - validator=Validator.from_callable( - lambda x: len(x) > 0, - error_message="Name cannot be empty, please enter a name", - ), - ) - - image_type = prompt( - "> Enter the image type you want your Llama Stack to be built as (use to see options): ", - completer=WordCompleter([e.value for e in ImageType]), - complete_while_typing=True, - validator=Validator.from_callable( - lambda x: x in [e.value for e in ImageType], - error_message="Invalid image type. Use to see options", - ), - ) - - image_name = f"llamastack-{name}" - - cprint( - textwrap.dedent( - """ - Llama Stack is composed of several APIs working together. Let's select - the provider types (implementations) you want to use for these APIs. - """, - ), - color="green", - file=sys.stderr, - ) - - cprint("Tip: use to see options for the providers.\n", color="green", file=sys.stderr) - - providers: dict[str, list[BuildProvider]] = dict() - for api, providers_for_api in get_provider_registry().items(): - available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")] - if not available_providers: - continue - api_provider = prompt( - f"> Enter provider for API {api.value}: ", - completer=WordCompleter(available_providers), - complete_while_typing=True, - validator=Validator.from_callable( - lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847 - error_message="Invalid provider, use to see options", - ), - ) - - string_providers = api_provider.split(" ") - - for provider in string_providers: - providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider)) - - description = prompt( - "\n > (Optional) Enter a short description for your Llama Stack: ", - default="", - ) - - distribution_spec = DistributionSpec( - providers=providers, - description=description, - ) - - build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec) - else: - with open(args.config) as f: - try: - contents = yaml.safe_load(f) - contents = replace_env_vars(contents) - build_config = BuildConfig(**contents) - if args.image_type: - build_config.image_type = args.image_type - except Exception as e: - cprint( - f"Could not parse config file {args.config}: {e}", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - if args.print_deps_only: - print(f"# Dependencies for {distro_name or args.config or image_name}") - normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config) - normal_deps += SERVER_DEPENDENCIES - print(f"uv pip install {' '.join(normal_deps)}") - for special_dep in special_deps: - print(f"uv pip install {special_dep}") - for external_dep in external_provider_dependencies: - print(f"uv pip install {external_dep}") - return - - try: - run_config = _run_stack_build_command_from_build_config( - build_config, - image_name=image_name, - config_path=args.config, - distro_name=distro_name, - ) - - except (Exception, RuntimeError) as exc: - import traceback - - cprint( - f"Error building stack: {exc}", - color="red", - file=sys.stderr, - ) - cprint("Stack trace:", color="red", file=sys.stderr) - traceback.print_exc() - sys.exit(1) - - if run_config is None: - cprint( - "Run config path is empty", - color="red", - file=sys.stderr, - ) - sys.exit(1) - - if args.run: - config_dict = yaml.safe_load(run_config.read_text()) - config = parse_and_maybe_upgrade_config(config_dict) - if config.external_providers_dir and not config.external_providers_dir.exists(): - config.external_providers_dir.mkdir(exist_ok=True) - run_args = formulate_run_args(args.image_type, image_name or config.image_name) - run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)]) - run_command(run_args) - - -def _generate_run_config( - build_config: BuildConfig, - build_dir: Path, - image_name: str, -) -> Path: - """ - Generate a run.yaml template file for user to edit from a build.yaml file - """ - apis = list(build_config.distribution_spec.providers.keys()) - distro_dir = DISTRIBS_BASE_DIR / image_name - storage = StorageConfig( - backends={ - "kv_default": SqliteKVStoreConfig( - db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db", - ), - "sql_default": SqliteSqlStoreConfig( - db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", - ), - }, - stores=ServerStoresConfig( - metadata=KVStoreReference( - backend="kv_default", - namespace="registry", - ), - inference=InferenceStoreReference( - backend="sql_default", - table_name="inference_store", - ), - conversations=SqlStoreReference( - backend="sql_default", - table_name="openai_conversations", - ), - ), - ) - - run_config = StackRunConfig( - container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), - image_name=image_name, - apis=apis, - providers={}, - storage=storage, - external_providers_dir=build_config.external_providers_dir - if build_config.external_providers_dir - else EXTERNAL_PROVIDERS_DIR, - ) - # build providers dict - provider_registry = get_provider_registry(build_config) - for api in apis: - run_config.providers[api] = [] - providers = build_config.distribution_spec.providers[api] - - for provider in providers: - pid = provider.provider_type.split("::")[-1] - - p = provider_registry[Api(api)][provider.provider_type] - if p.deprecation_error: - raise InvalidProviderError(p.deprecation_error) - - try: - config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class) - except (ModuleNotFoundError, ValueError) as exc: - # HACK ALERT: - # This code executes after building is done, the import cannot work since the - # package is either available in the venv or container - not available on the host. - # TODO: use a "is_external" flag in ProviderSpec to check if the provider is - # external - cprint( - f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}", - color="yellow", - file=sys.stderr, - ) - # Set config_type to None to avoid UnboundLocalError - config_type = None - - if config_type is not None and hasattr(config_type, "sample_run_config"): - config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}") - else: - config = {} - - p_spec = Provider( - provider_id=pid, - provider_type=provider.provider_type, - config=config, - module=provider.module, - ) - run_config.providers[api].append(p_spec) - - run_config_file = build_dir / f"{image_name}-run.yaml" - - with open(run_config_file, "w") as f: - to_write = json.loads(run_config.model_dump_json()) - f.write(yaml.dump(to_write, sort_keys=False)) - - # Only print this message for non-container builds since it will be displayed before the - # container is built - # For non-container builds, the run.yaml is generated at the very end of the build process so it - # makes sense to display this message - if build_config.image_type != LlamaStackImageType.CONTAINER.value: - cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) - return run_config_file - - -def _run_stack_build_command_from_build_config( - build_config: BuildConfig, - image_name: str | None = None, - distro_name: str | None = None, - config_path: str | None = None, -) -> Path | Traversable: - image_name = image_name or build_config.image_name - if build_config.image_type == LlamaStackImageType.CONTAINER.value: - if distro_name: - image_name = f"distribution-{distro_name}" - else: - if not image_name: - raise ValueError("Please specify an image name when building a container image without a template") - else: - if not image_name and os.environ.get("UV_SYSTEM_PYTHON"): - image_name = "__system__" - if not image_name: - raise ValueError("Please specify an image name when building a venv image") - - # At this point, image_name should be guaranteed to be a string - if image_name is None: - raise ValueError("image_name should not be None after validation") - - if distro_name: - build_dir = DISTRIBS_BASE_DIR / distro_name - build_file_path = build_dir / f"{distro_name}-build.yaml" - else: - if image_name is None: - raise ValueError("image_name cannot be None") - build_dir = DISTRIBS_BASE_DIR / image_name - build_file_path = build_dir / f"{image_name}-build.yaml" - - os.makedirs(build_dir, exist_ok=True) - run_config_file = None - # Generate the run.yaml so it can be included in the container image with the proper entrypoint - # Only do this if we're building a container image and we're not using a template - if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path: - cprint("Generating run.yaml file", color="yellow", file=sys.stderr) - run_config_file = _generate_run_config(build_config, build_dir, image_name) - - with open(build_file_path, "w") as f: - to_write = json.loads(build_config.model_dump_json(exclude_none=True)) - f.write(yaml.dump(to_write, sort_keys=False)) - - # We first install the external APIs so that the build process can use them and discover the - # providers dependencies - if build_config.external_apis_dir: - cprint("Installing external APIs", color="yellow", file=sys.stderr) - external_apis = load_external_apis(build_config) - if external_apis: - # install the external APIs - packages = [] - for _, api_spec in external_apis.items(): - if api_spec.pip_packages: - packages.extend(api_spec.pip_packages) - cprint( - f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}", - color="yellow", - file=sys.stderr, - ) - return_code = run_command(["uv", "pip", "install", *packages]) - if return_code != 0: - packages_str = ", ".join(packages) - raise RuntimeError( - f"Failed to install external APIs packages: {packages_str} (return code: {return_code})" - ) - - return_code = build_image( - build_config, - image_name, - distro_or_config=distro_name or config_path or str(build_file_path), - run_config=run_config_file.as_posix() if run_config_file else None, - ) - if return_code != 0: - raise RuntimeError(f"Failed to build image {image_name}") - - if distro_name: - # copy run.yaml from distribution to build_dir instead of generating it again - distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml" - run_config_file = build_dir / f"{distro_name}-run.yaml" - - with importlib.resources.as_file(distro_path) as path: - shutil.copy(path, run_config_file) - - cprint("Build Successful!", color="green", file=sys.stderr) - cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr) - if build_config.image_type == LlamaStackImageType.VENV: - cprint( - "You can run the new Llama Stack distro (after activating " - + colored(image_name, "cyan") - + ") via: " - + colored(f"llama stack run {run_config_file}", "blue"), - color="green", - file=sys.stderr, - ) - elif build_config.image_type == LlamaStackImageType.CONTAINER: - cprint( - "You can run the container with: " - + colored( - f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue" - ), - color="green", - file=sys.stderr, - ) - return distro_path - else: - return _generate_run_config(build_config, build_dir, image_name) - - -def _run_distro_list_cmd() -> None: - headers = [ - "Distribution Name", - # "Providers", - "Description", - ] - - rows = [] - for distro_name, spec in available_distros_specs().items(): - rows.append( - [ - distro_name, - # json.dumps(spec.distribution_spec.providers, indent=2), - spec.distribution_spec.description, - ] - ) - print_table( - rows, - headers, - separate_rows=True, - ) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py deleted file mode 100644 index cbe8ed881..000000000 --- a/llama_stack/cli/stack/build.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import argparse -import textwrap - -from llama_stack.cli.stack.utils import ImageType -from llama_stack.cli.subcommand import Subcommand -from llama_stack.log import get_logger - -logger = get_logger(__name__, category="cli") - - -class StackBuild(Subcommand): - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "build", - prog="llama stack build", - description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps ' instead.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_stack_build_command) - - def _add_arguments(self): - self.parser.add_argument( - "--config", - type=str, - default=None, - help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively", - ) - - self.parser.add_argument( - "--template", - type=str, - default=None, - help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""", - ) - self.parser.add_argument( - "--distro", - "--distribution", - dest="distribution", - type=str, - default=None, - help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""", - ) - - self.parser.add_argument( - "--list-distros", - "--list-distributions", - action="store_true", - dest="list_distros", - default=False, - help="Show the available distributions for building a Llama Stack distribution", - ) - - self.parser.add_argument( - "--image-type", - type=str, - help="Image Type to use for the build. If not specified, will use the image type from the template config.", - choices=[e.value for e in ImageType], - default=None, # no default so we can detect if a user specified --image-type and override image_type in the config - ) - - self.parser.add_argument( - "--image-name", - type=str, - help=textwrap.dedent( - f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for -the build. If not specified, currently active environment will be used if found. - """ - ), - default=None, - ) - self.parser.add_argument( - "--print-deps-only", - default=False, - action="store_true", - help="Print the dependencies for the stack only, without building the stack", - ) - - self.parser.add_argument( - "--run", - action="store_true", - default=False, - help="Run the stack after building using the same image type, name, and other applicable arguments", - ) - self.parser.add_argument( - "--providers", - type=str, - default=None, - help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.", - ) - - def _run_stack_build_command(self, args: argparse.Namespace) -> None: - logger.warning( - "The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'" - ) - # always keep implementation completely silo-ed away from CLI so CLI - # can be fast to load and reduces dependencies - from ._build import run_stack_build_command - - return run_stack_build_command(args) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index fd0a4edf5..351da972f 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -11,7 +11,6 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand -from .build import StackBuild from .list_apis import StackListApis from .list_deps import StackListDeps from .list_providers import StackListProviders @@ -41,7 +40,6 @@ class StackParser(Subcommand): # Add sub-commands StackListDeps.create(subparsers) - StackBuild.create(subparsers) StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) diff --git a/llama_stack/core/access_control/datatypes.py b/llama_stack/core/access_control/datatypes.py index c833ed51b..84beb8e15 100644 --- a/llama_stack/core/access_control/datatypes.py +++ b/llama_stack/core/access_control/datatypes.py @@ -41,7 +41,7 @@ class AccessRule(BaseModel): A rule defines a list of action either to permit or to forbid. It may specify a principal or a resource that must match for the rule to take effect. The resource to match should be specified in the form of a type qualified identifier, e.g. - model::my-model or vector_db::some-db, or a wildcard for all resources of a type, + model::my-model or vector_store::some-db, or a wildcard for all resources of a type, e.g. model::*. If the principal or resource are not specified, they will match all requests. @@ -79,9 +79,9 @@ class AccessRule(BaseModel): description: any user has read access to any resource created by a member of their team - forbid: actions: [create, read, delete] - resource: vector_db::* + resource: vector_store::* unless: user with admin in roles - description: only user with admin role can use vector_db resources + description: only user with admin role can use vector_store resources """ diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh deleted file mode 100755 index 03ed846d9..000000000 --- a/llama_stack/core/build_container.sh +++ /dev/null @@ -1,410 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} -LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-} - -TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} -PYPI_VERSION=${PYPI_VERSION:-} -BUILD_PLATFORM=${BUILD_PLATFORM:-} -# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} - -# mounting is not supported by docker buildx, so we use COPY instead -USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-} -# Path to the run.yaml file in the container -RUN_CONFIG_PATH=/app/run.yaml - -BUILD_CONTEXT_DIR=$(pwd) - -set -euo pipefail - -# Define color codes -RED='\033[0;31m' -NC='\033[0m' # No Color - -# Usage function -usage() { - echo "Usage: $0 --image-name --container-base --normal-deps [--run-config ] [--external-provider-deps ] [--optional-deps ]" - echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'" - exit 1 -} - -# Parse arguments -image_name="" -container_base="" -normal_deps="" -external_provider_deps="" -optional_deps="" -run_config="" -distro_or_config="" - -while [[ $# -gt 0 ]]; do - key="$1" - case "$key" in - --image-name) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --image-name requires a string value" >&2 - usage - fi - image_name="$2" - shift 2 - ;; - --container-base) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --container-base requires a string value" >&2 - usage - fi - container_base="$2" - shift 2 - ;; - --normal-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --normal-deps requires a string value" >&2 - usage - fi - normal_deps="$2" - shift 2 - ;; - --external-provider-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --external-provider-deps requires a string value" >&2 - usage - fi - external_provider_deps="$2" - shift 2 - ;; - --optional-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --optional-deps requires a string value" >&2 - usage - fi - optional_deps="$2" - shift 2 - ;; - --run-config) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --run-config requires a string value" >&2 - usage - fi - run_config="$2" - shift 2 - ;; - --distro-or-config) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --distro-or-config requires a string value" >&2 - usage - fi - distro_or_config="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - usage - ;; - esac -done - -# Check required arguments -if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then - echo "Error: --image-name, --container-base, and --normal-deps are required." >&2 - usage -fi - -CONTAINER_BINARY=${CONTAINER_BINARY:-docker} -CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain} -TEMP_DIR=$(mktemp -d) -SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -source "$SCRIPT_DIR/common.sh" - -add_to_container() { - output_file="$TEMP_DIR/Containerfile" - if [ -t 0 ]; then - printf '%s\n' "$1" >>"$output_file" - else - cat >>"$output_file" - fi -} - -if ! is_command_available "$CONTAINER_BINARY"; then - printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2 - exit 1 -fi - -if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then - add_to_container << EOF -FROM $container_base -WORKDIR /app - -# We install the Python 3.12 dev headers and build tools so that any -# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully. - -RUN dnf -y update && dnf install -y iputils git net-tools wget \ - vim-minimal python3.12 python3.12-pip python3.12-wheel \ - python3.12-setuptools python3.12-devel gcc gcc-c++ make && \ - ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all - -ENV UV_SYSTEM_PYTHON=1 -RUN pip install uv -EOF -else - add_to_container << EOF -FROM $container_base -WORKDIR /app - -RUN apt-get update && apt-get install -y \ - iputils-ping net-tools iproute2 dnsutils telnet \ - curl wget telnet git\ - procps psmisc lsof \ - traceroute \ - bubblewrap \ - gcc g++ \ - && rm -rf /var/lib/apt/lists/* - -ENV UV_SYSTEM_PYTHON=1 -RUN pip install uv -EOF -fi - -# Add pip dependencies first since llama-stack is what will change most often -# so we can reuse layers. -if [ -n "$normal_deps" ]; then - read -ra pip_args <<< "$normal_deps" - quoted_deps=$(printf " %q" "${pip_args[@]}") - add_to_container << EOF -RUN uv pip install --no-cache $quoted_deps -EOF -fi - -if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - read -ra pip_args <<< "$part" - quoted_deps=$(printf " %q" "${pip_args[@]}") - add_to_container <=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0] - module = importlib.import_module(f'{package_name}.provider') - spec = module.get_provider_spec() - if hasattr(spec, 'pip_packages') and spec.pip_packages: - if isinstance(spec.pip_packages, (list, tuple)): - print('\n'.join(spec.pip_packages)) -except Exception as e: - print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr) -PYTHON -EOF - done -fi - -get_python_cmd() { - if is_command_available python; then - echo "python" - elif is_command_available python3; then - echo "python3" - else - echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2 - exit 1 - fi -} - -if [ -n "$run_config" ]; then - # Copy the run config to the build context since it's an absolute path - cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml" - - # Parse the run.yaml configuration to identify external provider directories - # If external providers are specified, copy their directory to the container - # and update the configuration to reference the new container path - python_cmd=$(get_python_cmd) - external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')") - external_providers_dir=$(eval echo "$external_providers_dir") - if [ -n "$external_providers_dir" ]; then - if [ -d "$external_providers_dir" ]; then - echo "Copying external providers directory: $external_providers_dir" - cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d" - add_to_container << EOF -COPY providers.d /.llama/providers.d -EOF - fi - - # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d - if [ "$(uname)" = "Darwin" ]; then - sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml" - rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak" - else - sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml" - fi - fi - - # Copy run config into docker image - add_to_container << EOF -COPY run.yaml $RUN_CONFIG_PATH -EOF -fi - -stack_mount="/app/llama-stack-source" -client_mount="/app/llama-stack-client-source" - -install_local_package() { - local dir="$1" - local mount_point="$2" - local name="$3" - - if [ ! -d "$dir" ]; then - echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2 - exit 1 - fi - - if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then - add_to_container << EOF -COPY $dir $mount_point -EOF - fi - add_to_container << EOF -RUN uv pip install --no-cache -e $mount_point -EOF -} - - -if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR" -fi - -if [ -n "$LLAMA_STACK_DIR" ]; then - install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR" -else - if [ -n "$TEST_PYPI_VERSION" ]; then - # these packages are damaged in test-pypi, so install them first - add_to_container << EOF -RUN uv pip install --no-cache fastapi libcst -EOF - add_to_container << EOF -RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \ - --index-strategy unsafe-best-match \ - llama-stack==$TEST_PYPI_VERSION - -EOF - else - if [ -n "$PYPI_VERSION" ]; then - SPEC_VERSION="llama-stack==${PYPI_VERSION}" - else - SPEC_VERSION="llama-stack" - fi - add_to_container << EOF -RUN uv pip install --no-cache $SPEC_VERSION -EOF - fi -fi - -# remove uv after installation - add_to_container << EOF -RUN pip uninstall -y uv -EOF - -# If a run config is provided, we use the llama stack CLI -if [[ -n "$run_config" ]]; then - add_to_container << EOF -ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"] -EOF -elif [[ "$distro_or_config" != *.yaml ]]; then - add_to_container << EOF -ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"] -EOF -fi - -# Add other require item commands genearic to all containers -add_to_container << EOF - -RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true) -EOF - -printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" -cat "$TEMP_DIR"/Containerfile -printf "\n" - -# Start building the CLI arguments -CLI_ARGS=() - -# Read CONTAINER_OPTS and put it in an array -read -ra CLI_ARGS <<< "$CONTAINER_OPTS" - -if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then - if [ -n "$LLAMA_STACK_DIR" ]; then - CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount") - fi - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount") - fi -fi - -if is_command_available selinuxenabled && selinuxenabled; then - # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - CLI_ARGS+=("--security-opt" "label=disable") -fi - -# Set version tag based on PyPI version -if [ -n "$PYPI_VERSION" ]; then - version_tag="$PYPI_VERSION" -elif [ -n "$TEST_PYPI_VERSION" ]; then - version_tag="test-$TEST_PYPI_VERSION" -elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then - version_tag="dev" -else - URL="https://pypi.org/pypi/llama-stack/json" - version_tag=$(curl -s $URL | jq -r '.info.version') -fi - -# Add version tag to image name -image_tag="$image_name:$version_tag" - -# Detect platform architecture -ARCH=$(uname -m) -if [ -n "$BUILD_PLATFORM" ]; then - CLI_ARGS+=("--platform" "$BUILD_PLATFORM") -elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then - CLI_ARGS+=("--platform" "linux/arm64") -elif [ "$ARCH" = "x86_64" ]; then - CLI_ARGS+=("--platform" "linux/amd64") -else - echo "Unsupported architecture: $ARCH" - exit 1 -fi - -echo "PWD: $(pwd)" -echo "Containerfile: $TEMP_DIR/Containerfile" -set -x - -$CONTAINER_BINARY build \ - "${CLI_ARGS[@]}" \ - -t "$image_tag" \ - -f "$TEMP_DIR/Containerfile" \ - "$BUILD_CONTEXT_DIR" - -# clean up tmp/configs -rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" -set +x - -echo "Success!" diff --git a/llama_stack/core/build_venv.sh b/llama_stack/core/build_venv.sh deleted file mode 100755 index 04927d71e..000000000 --- a/llama_stack/core/build_venv.sh +++ /dev/null @@ -1,220 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} -LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-} -TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} -# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} -UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-} -VIRTUAL_ENV=${VIRTUAL_ENV:-} - -set -euo pipefail - -# Define color codes -RED='\033[0;31m' -NC='\033[0m' # No Color - -SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -source "$SCRIPT_DIR/common.sh" - -# Usage function -usage() { - echo "Usage: $0 --env-name --normal-deps [--external-provider-deps ] [--optional-deps ]" - echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'" - exit 1 -} - -# Parse arguments -env_name="" -normal_deps="" -external_provider_deps="" -optional_deps="" - -while [[ $# -gt 0 ]]; do - key="$1" - case "$key" in - --env-name) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --env-name requires a string value" >&2 - usage - fi - env_name="$2" - shift 2 - ;; - --normal-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --normal-deps requires a string value" >&2 - usage - fi - normal_deps="$2" - shift 2 - ;; - --external-provider-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --external-provider-deps requires a string value" >&2 - usage - fi - external_provider_deps="$2" - shift 2 - ;; - --optional-deps) - if [[ -z "$2" || "$2" == --* ]]; then - echo "Error: --optional-deps requires a string value" >&2 - usage - fi - optional_deps="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - usage - ;; - esac -done - -# Check required arguments -if [[ -z "$env_name" || -z "$normal_deps" ]]; then - echo "Error: --env-name and --normal-deps are required." >&2 - usage -fi - -if [ -n "$LLAMA_STACK_DIR" ]; then - echo "Using llama-stack-dir=$LLAMA_STACK_DIR" -fi -if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR" -fi - -ENVNAME="" - -# pre-run checks to make sure we can proceed with the installation -pre_run_checks() { - local env_name="$1" - - if ! is_command_available uv; then - echo "uv is not installed, trying to install it." - if ! is_command_available pip; then - echo "pip is not installed, cannot automatically install 'uv'." - echo "Follow this link to install it:" - echo "https://docs.astral.sh/uv/getting-started/installation/" - exit 1 - else - pip install uv - fi - fi - - # checking if an environment with the same name already exists - if [ -d "$env_name" ]; then - echo "Environment '$env_name' already exists, re-using it." - fi -} - -run() { - # Use only global variables set by flag parser - if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then - echo "Installing dependencies in system Python environment" - export UV_SYSTEM_PYTHON=1 - elif [ "$VIRTUAL_ENV" == "$env_name" ]; then - echo "Virtual environment $env_name is already active" - else - echo "Using virtual environment $env_name" - uv venv "$env_name" - source "$env_name/bin/activate" - fi - - if [ -n "$TEST_PYPI_VERSION" ]; then - uv pip install fastapi libcst - uv pip install --extra-index-url https://test.pypi.org/simple/ \ - --index-strategy unsafe-best-match \ - llama-stack=="$TEST_PYPI_VERSION" \ - $normal_deps - if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - echo "$part" - uv pip install $part - done - fi - if [ -n "$external_provider_deps" ]; then - IFS='#' read -ra parts <<<"$external_provider_deps" - for part in "${parts[@]}"; do - echo "$part" - uv pip install "$part" - done - fi - else - if [ -n "$LLAMA_STACK_DIR" ]; then - # only warn if DIR does not start with "git+" - if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then - printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2 - exit 1 - fi - printf "Installing from LLAMA_STACK_DIR: %s\n" "$LLAMA_STACK_DIR" - # editable only if LLAMA_STACK_DIR does not start with "git+" - if [[ "$LLAMA_STACK_DIR" != git+* ]]; then - EDITABLE="-e" - else - EDITABLE="" - fi - uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR" - else - uv pip install --no-cache-dir llama-stack - fi - - if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - # only warn if DIR does not start with "git+" - if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then - printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2 - exit 1 - fi - printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR" - # editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+" - if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then - EDITABLE="-e" - else - EDITABLE="" - fi - uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR" - fi - - printf "Installing pip dependencies\n" - uv pip install $normal_deps - if [ -n "$optional_deps" ]; then - IFS='#' read -ra parts <<<"$optional_deps" - for part in "${parts[@]}"; do - echo "Installing special provider module: $part" - uv pip install $part - done - fi - if [ -n "$external_provider_deps" ]; then - IFS='#' read -ra parts <<<"$external_provider_deps" - for part in "${parts[@]}"; do - echo "Installing external provider module: $part" - uv pip install "$part" - echo "Getting provider spec for module: $part and installing dependencies" - package_name=$(echo "$part" | sed 's/[<>=!].*//') - python3 -c " -import importlib -import sys -try: - module = importlib.import_module(f'$package_name.provider') - spec = module.get_provider_spec() - if hasattr(spec, 'pip_packages') and spec.pip_packages: - print('\\n'.join(spec.pip_packages)) -except Exception as e: - print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr) -" | uv pip install -r - - done - fi - fi -} - -pre_run_checks "$env_name" -run diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index e8cb36a02..5f4775d87 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput from llama_stack.apis.shields import Shield, ShieldInput from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime -from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.storage.datatypes import ( KVStoreReference, @@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner): pass -class VectorDBWithOwner(VectorDB, ResourceWithOwner): +class VectorStoreWithOwner(VectorStore, ResourceWithOwner): pass @@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner): pass -RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup +RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup RoutableObjectWithProvider = Annotated[ ModelWithOwner | ShieldWithOwner - | VectorDBWithOwner + | VectorStoreWithOwner | DatasetWithOwner | ScoringFnWithOwner | BenchmarkWithOwner @@ -427,7 +427,7 @@ class RegisteredResources(BaseModel): models: list[ModelInput] = Field(default_factory=list) shields: list[ShieldInput] = Field(default_factory=list) - vector_dbs: list[VectorDBInput] = Field(default_factory=list) + vector_stores: list[VectorStoreInput] = Field(default_factory=list) datasets: list[DatasetInput] = Field(default_factory=list) scoring_fns: list[ScoringFnInput] = Field(default_factory=list) benchmarks: list[BenchmarkInput] = Field(default_factory=list) diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py index 59461f5d6..82cbcf984 100644 --- a/llama_stack/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]: router_api=Api.tool_runtime, ), AutoRoutedApiInfo( - routing_table_api=Api.vector_dbs, + routing_table_api=Api.vector_stores, router_api=Api.vector_io, ), ] diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 1179075cd..328ca9c6e 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -278,7 +278,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): else: prefix = "!" if in_notebook() else "" cprint( - f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n", + f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n", "yellow", file=sys.stderr, ) diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index 6e1843870..0b63815ea 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( @@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.inspect: Inspect, Api.batches: Batches, Api.vector_io: VectorIO, - Api.vector_dbs: VectorDBs, + Api.vector_stores: VectorStore, Api.models: Models, Api.safety: Safety, Api.shields: Shields, diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py index df4df0463..20c17e59d 100644 --- a/llama_stack/core/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -29,7 +29,7 @@ async def get_routing_table_impl( from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable - from ..routing_tables.vector_dbs import VectorDBsRoutingTable + from ..routing_tables.vector_stores import VectorStoresRoutingTable api_to_tables = { "models": ModelsRoutingTable, @@ -38,7 +38,7 @@ async def get_routing_table_impl( "scoring_functions": ScoringFunctionsRoutingTable, "benchmarks": BenchmarksRoutingTable, "tool_groups": ToolGroupsRoutingTable, - "vector_dbs": VectorDBsRoutingTable, + "vector_stores": VectorStoresRoutingTable, } if api.value not in api_to_tables: diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py index ad82293e5..7c5bb25c6 100644 --- a/llama_stack/core/routers/tool_runtime.py +++ b/llama_stack/core/routers/tool_runtime.py @@ -8,16 +8,8 @@ from typing import Any from llama_stack.apis.common.content_types import ( URL, - InterleavedContent, -) -from llama_stack.apis.tools import ( - ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, - ToolRuntime, ) +from llama_stack.apis.tools import ListToolDefsResponse, ToolRuntime from llama_stack.log import get_logger from ..routing_tables.toolgroups import ToolGroupsRoutingTable @@ -26,36 +18,6 @@ logger = get_logger(name=__name__, category="core::routers") class ToolRuntimeRouter(ToolRuntime): - class RagToolImpl(RAGToolRuntime): - def __init__( - self, - routing_table: ToolGroupsRoutingTable, - ) -> None: - logger.debug("Initializing ToolRuntimeRouter.RagToolImpl") - self.routing_table = routing_table - - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}") - provider = await self.routing_table.get_provider_impl("knowledge_search") - return await provider.query(content, vector_db_ids, query_config) - - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - logger.debug( - f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" - ) - provider = await self.routing_table.get_provider_impl("insert_into_memory") - return await provider.insert(documents, vector_db_id, chunk_size_in_tokens) - def __init__( self, routing_table: ToolGroupsRoutingTable, @@ -63,11 +25,6 @@ class ToolRuntimeRouter(ToolRuntime): logger.debug("Initializing ToolRuntimeRouter") self.routing_table = routing_table - # HACK ALERT this should be in sync with "get_all_api_endpoints()" - self.rag_tool = self.RagToolImpl(routing_table) - for method in ("query", "insert"): - setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) - async def initialize(self) -> None: logger.debug("ToolRuntimeRouter.initialize") pass diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index bfc5f7164..2b1701dc2 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -71,25 +71,6 @@ class VectorIORouter(VectorIO): raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") - async def register_vector_db( - self, - vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - vector_db_name: str | None = None, - provider_vector_db_id: str | None = None, - ) -> None: - logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}") - await self.routing_table.register_vector_db( - vector_db_id, - embedding_model, - embedding_dimension, - provider_id, - vector_db_name, - provider_vector_db_id, - ) - async def insert_chunks( self, vector_db_id: str, @@ -165,22 +146,22 @@ class VectorIORouter(VectorIO): else: provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] - vector_db_id = f"vs_{uuid.uuid4()}" - registered_vector_db = await self.routing_table.register_vector_db( - vector_db_id=vector_db_id, + vector_store_id = f"vs_{uuid.uuid4()}" + registered_vector_store = await self.routing_table.register_vector_store( + vector_store_id=vector_store_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension, provider_id=provider_id, - provider_vector_db_id=vector_db_id, - vector_db_name=params.name, + provider_vector_store_id=vector_store_id, + vector_store_name=params.name, ) - provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) + provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier) - # Update model_extra with registered values so provider uses the already-registered vector_db + # Update model_extra with registered values so provider uses the already-registered vector_store if params.model_extra is None: params.model_extra = {} - params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id - params.model_extra["provider_id"] = registered_vector_db.provider_id + params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id + params.model_extra["provider_id"] = registered_vector_store.provider_id if embedding_model is not None: params.model_extra["embedding_model"] = embedding_model if embedding_dimension is not None: @@ -198,15 +179,15 @@ class VectorIORouter(VectorIO): logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") # Route to default provider for now - could aggregate from all providers in the future # call retrieve on each vector dbs to get list of vector stores - vector_dbs = await self.routing_table.get_all_with_type("vector_db") + vector_stores = await self.routing_table.get_all_with_type("vector_store") all_stores = [] - for vector_db in vector_dbs: + for vector_store in vector_stores: try: - provider = await self.routing_table.get_provider_impl(vector_db.identifier) - vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier) + provider = await self.routing_table.get_provider_impl(vector_store.identifier) + vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier) all_stores.append(vector_store) except Exception as e: - logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}") + logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}") continue # Sort by created_at diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 087483bb6..d6faf93c5 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable elif api == Api.safety: return await p.register_shield(obj) elif api == Api.vector_io: - return await p.register_vector_db(obj) + return await p.register_vector_store(obj) elif api == Api.datasetio: return await p.register_dataset(obj) elif api == Api.scoring: @@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None: api = get_impl_api(p) if api == Api.vector_io: - return await p.unregister_vector_db(obj.identifier) + return await p.unregister_vector_store(obj.identifier) elif api == Api.inference: return await p.unregister_model(obj.identifier) elif api == Api.safety: @@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable): elif api == Api.safety: p.shield_store = self elif api == Api.vector_io: - p.vector_db_store = self + p.vector_store_store = self elif api == Api.datasetio: p.dataset_store = self elif api == Api.scoring: @@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable): from .scoring_functions import ScoringFunctionsRoutingTable from .shields import ShieldsRoutingTable from .toolgroups import ToolGroupsRoutingTable - from .vector_dbs import VectorDBsRoutingTable + from .vector_stores import VectorStoresRoutingTable def apiname_object(): if isinstance(self, ModelsRoutingTable): return ("Inference", "model") elif isinstance(self, ShieldsRoutingTable): return ("Safety", "shield") - elif isinstance(self, VectorDBsRoutingTable): - return ("VectorIO", "vector_db") + elif isinstance(self, VectorStoresRoutingTable): + return ("VectorIO", "vector_store") elif isinstance(self, DatasetsRoutingTable): return ("DatasetIO", "dataset") elif isinstance(self, ScoringFunctionsRoutingTable): diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_stores.py similarity index 70% rename from llama_stack/core/routing_tables/vector_dbs.py rename to llama_stack/core/routing_tables/vector_stores.py index e87fb61c6..c6c80a01e 100644 --- a/llama_stack/core/routing_tables/vector_dbs.py +++ b/llama_stack/core/routing_tables/vector_stores.py @@ -6,15 +6,12 @@ from typing import Any -from pydantic import TypeAdapter - from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError from llama_stack.apis.models import ModelType from llama_stack.apis.resource import ResourceType -# Removed VectorDBs import to avoid exposing public API +# Removed VectorStores import to avoid exposing public API from llama_stack.apis.vector_io.vector_io import ( - OpenAICreateVectorStoreRequestWithExtraBody, SearchRankingOptions, VectorStoreChunkingStrategy, VectorStoreDeleteResponse, @@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreSearchResponsePage, ) from llama_stack.core.datatypes import ( - VectorDBWithOwner, + VectorStoreWithOwner, ) from llama_stack.log import get_logger @@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model logger = get_logger(name=__name__, category="core::routing_tables") -class VectorDBsRoutingTable(CommonRoutingTableImpl): - """Internal routing table for vector_db operations. +class VectorStoresRoutingTable(CommonRoutingTableImpl): + """Internal routing table for vector_store operations. - Does not inherit from VectorDBs to avoid exposing public API endpoints. + Does not inherit from VectorStores to avoid exposing public API endpoints. Only provides internal routing functionality for VectorIORouter. """ # Internal methods only - no public API exposure - async def register_vector_db( + async def register_vector_store( self, - vector_db_id: str, + vector_store_id: str, embedding_model: str, embedding_dimension: int | None = 384, provider_id: str | None = None, - provider_vector_db_id: str | None = None, - vector_db_name: str | None = None, + provider_vector_store_id: str | None = None, + vector_store_name: str | None = None, ) -> Any: if provider_id is None: if len(self.impls_by_provider_id) > 0: @@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): raise ModelNotFoundError(embedding_model) if model.model_type != ModelType.embedding: raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) - if "embedding_dimension" not in model.metadata: - raise ValueError(f"Model {embedding_model} does not have an embedding dimension") - try: - provider = self.impls_by_provider_id[provider_id] - except KeyError: - available_providers = list(self.impls_by_provider_id.keys()) - raise ValueError( - f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}" - ) from None - logger.warning( - "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly." - ) - request = OpenAICreateVectorStoreRequestWithExtraBody( - name=vector_db_name or vector_db_id, - embedding_model=embedding_model, - embedding_dimension=model.metadata["embedding_dimension"], + vector_store = VectorStoreWithOwner( + identifier=vector_store_id, + type=ResourceType.vector_store.value, provider_id=provider_id, - provider_vector_db_id=provider_vector_db_id, + provider_resource_id=provider_vector_store_id, + embedding_model=embedding_model, + embedding_dimension=embedding_dimension, + vector_store_name=vector_store_name, ) - vector_store = await provider.openai_create_vector_store(request) - - vector_store_id = vector_store.id - actual_provider_vector_db_id = provider_vector_db_id or vector_store_id - logger.warning( - f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name" - ) - - vector_db_data = { - "identifier": vector_store_id, - "type": ResourceType.vector_db.value, - "provider_id": provider_id, - "provider_resource_id": actual_provider_vector_db_id, - "embedding_model": embedding_model, - "embedding_dimension": model.metadata["embedding_dimension"], - "vector_db_name": vector_store.name, - } - vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data) - await self.register_object(vector_db) - return vector_db + await self.register_object(vector_store) + return vector_store async def openai_retrieve_vector_store( self, vector_store_id: str, ) -> VectorStoreObject: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store(vector_store_id) @@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): expires_after: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store( vector_store_id=vector_store_id, @@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): self, vector_store_id: str, ) -> VectorStoreDeleteResponse: - await self.assert_action_allowed("delete", "vector_db", vector_store_id) + await self.assert_action_allowed("delete", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) result = await provider.openai_delete_vector_store(vector_store_id) - await self.unregister_vector_db(vector_store_id) + await self.unregister_vector_store(vector_store_id) return result - async def unregister_vector_db(self, vector_store_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: """Remove the vector store from the routing table registry.""" try: - vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id) - if vector_db_obj: - await self.unregister_object(vector_db_obj) + vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id) + if vector_store_obj: + await self.unregister_object(vector_store_obj) except Exception as e: # Log the error but don't fail the operation logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") @@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): rewrite_query: bool | None = False, search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_search_vector_store( vector_store_id=vector_store_id, @@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): attributes: dict[str, Any] | None = None, chunking_strategy: VectorStoreChunkingStrategy | None = None, ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_attach_file_to_vector_store( vector_store_id=vector_store_id, @@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): before: str | None = None, filter: VectorStoreFileStatus | None = None, ) -> list[VectorStoreFileObject]: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_list_files_in_vector_store( vector_store_id=vector_store_id, @@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileObject: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file( vector_store_id=vector_store_id, @@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileContentsResponse: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, @@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): file_id: str, attributes: dict[str, Any], ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store_file( vector_store_id=vector_store_id, @@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileDeleteResponse: - await self.assert_action_allowed("delete", "vector_db", vector_store_id) + await self.assert_action_allowed("delete", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_delete_vector_store_file( vector_store_id=vector_store_id, @@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): attributes: dict[str, Any] | None = None, chunking_strategy: Any | None = None, ): - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_create_vector_store_file_batch( vector_store_id=vector_store_id, @@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): batch_id: str, vector_store_id: str, ): - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_batch( batch_id=batch_id, @@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): limit: int | None = 20, order: str | None = "desc", ): - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_list_files_in_vector_store_file_batch( batch_id=batch_id, @@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): batch_id: str, vector_store_id: str, ): - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_cancel_vector_store_file_batch( batch_id=batch_id, diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py index 4970d0bf8..ed76ea86f 100644 --- a/llama_stack/core/server/routes.py +++ b/llama_stack/core/server/routes.py @@ -13,7 +13,6 @@ from aiohttp import hdrs from starlette.routing import Route from llama_stack.apis.datatypes import Api, ExternalApiSpec -from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.core.resolver import api_protocol_map from llama_stack.schema_utils import WebMethod @@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl] RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod] -def toolgroup_protocol_map(): - return { - SpecialToolGroup.rag_tool: RAGToolRuntime, - } - - def get_all_api_routes( external_apis: dict[Api, ExternalApiSpec] | None = None, ) -> dict[Api, list[tuple[Route, WebMethod]]]: apis = {} protocols = api_protocol_map(external_apis) - toolgroup_protocols = toolgroup_protocol_map() for api, protocol in protocols.items(): routes = [] protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) - # HACK ALERT - if api == Api.tool_runtime: - for tool_group in SpecialToolGroup: - sub_protocol = toolgroup_protocols[tool_group] - sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction) - for name, method in sub_protocol_methods: - if not hasattr(method, "__webmethod__"): - continue - protocol_methods.append((f"{tool_group.value}.{name}", method)) - for name, method in protocol_methods: # Get all webmethods for this method (supports multiple decorators) webmethods = getattr(method, "__webmethods__", []) diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 4cf1d072d..49100b4bc 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -32,7 +32,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration from llama_stack.apis.telemetry import Telemetry -from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime +from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig @@ -80,7 +80,6 @@ class LlamaStack( Inspect, ToolGroups, ToolRuntime, - RAGToolRuntime, Files, Prompts, Conversations, diff --git a/llama_stack/core/ui/README.md b/llama_stack/core/ui/README.md index f1d85454b..37f1501c9 100644 --- a/llama_stack/core/ui/README.md +++ b/llama_stack/core/ui/README.md @@ -9,7 +9,7 @@ 1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll). ``` -llama stack build --distro together --image-type venv +llama stack list-deps together | xargs -L1 uv pip install llama stack run together ``` diff --git a/llama_stack/core/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py index 4ee9d2204..16fd464ee 100644 --- a/llama_stack/core/ui/page/playground/tools.py +++ b/llama_stack/core/ui/page/playground/tools.py @@ -32,7 +32,7 @@ def tool_chat_page(): tool_groups_list = [tool_group.identifier for tool_group in tool_groups] mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] - selected_vector_dbs = [] + selected_vector_stores = [] def reset_agent(): st.session_state.clear() @@ -55,13 +55,13 @@ def tool_chat_page(): ) if "builtin::rag" in toolgroup_selection: - vector_dbs = llama_stack_api.client.vector_dbs.list() or [] - if not vector_dbs: + vector_stores = llama_stack_api.client.vector_stores.list() or [] + if not vector_stores: st.info("No vector databases available for selection.") - vector_dbs = [vector_db.identifier for vector_db in vector_dbs] - selected_vector_dbs = st.multiselect( + vector_stores = [vector_store.identifier for vector_store in vector_stores] + selected_vector_stores = st.multiselect( label="Select Document Collections to use in RAG queries", - options=vector_dbs, + options=vector_stores, on_change=reset_agent, ) @@ -119,7 +119,7 @@ def tool_chat_page(): tool_dict = dict( name="builtin::rag", args={ - "vector_db_ids": list(selected_vector_dbs), + "vector_store_ids": list(selected_vector_stores), }, ) toolgroup_selection[i] = tool_dict diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml index c01e415a9..3cf43de15 100644 --- a/llama_stack/distributions/ci-tests/build.yaml +++ b/llama_stack/distributions/ci-tests/build.yaml @@ -48,7 +48,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index ecf9eed3b..f403527fc 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -216,8 +216,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -263,8 +261,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/dell/build.yaml b/llama_stack/distributions/dell/build.yaml index 7bc26ca9e..0275a47a1 100644 --- a/llama_stack/distributions/dell/build.yaml +++ b/llama_stack/distributions/dell/build.yaml @@ -26,7 +26,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime image_type: venv additional_pip_packages: - aiosqlite diff --git a/llama_stack/distributions/dell/dell.py b/llama_stack/distributions/dell/dell.py index 88e72688f..708ba0b10 100644 --- a/llama_stack/distributions/dell/dell.py +++ b/llama_stack/distributions/dell/dell.py @@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), ], } name = "dell" @@ -98,10 +97,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="brave-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] return DistributionTemplate( diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md index 852e78d0e..4e28673e8 100644 --- a/llama_stack/distributions/dell/doc_template.md +++ b/llama_stack/distributions/dell/doc_template.md @@ -157,7 +157,7 @@ docker run \ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. ```bash -llama stack build --distro {{ name }} --image-type conda +llama stack list-deps {{ name }} | xargs -L1 pip install INFERENCE_MODEL=$INFERENCE_MODEL \ DEH_URL=$DEH_URL \ CHROMA_URL=$CHROMA_URL \ diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index 2563f2f4b..062c50e2b 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -87,8 +87,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime storage: backends: kv_default: @@ -133,8 +131,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index 7bada394f..42e0658bd 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -83,8 +83,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime storage: backends: kv_default: @@ -124,8 +122,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/meta-reference-gpu/build.yaml b/llama_stack/distributions/meta-reference-gpu/build.yaml index 1513742a7..74da29bb8 100644 --- a/llama_stack/distributions/meta-reference-gpu/build.yaml +++ b/llama_stack/distributions/meta-reference-gpu/build.yaml @@ -24,7 +24,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/llama_stack/distributions/meta-reference-gpu/meta_reference.py index 4e4ddef33..aa66d43a0 100644 --- a/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -47,7 +47,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -92,10 +91,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] return DistributionTemplate( diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 01b5db4f9..6e74201db 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -98,8 +98,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol storage: @@ -146,8 +144,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index 87c33dde0..92934ca74 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -88,8 +88,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol storage: @@ -131,8 +129,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/nvidia/build.yaml b/llama_stack/distributions/nvidia/build.yaml index 8ddd12439..3412ea15b 100644 --- a/llama_stack/distributions/nvidia/build.yaml +++ b/llama_stack/distributions/nvidia/build.yaml @@ -19,8 +19,7 @@ distribution_spec: - provider_type: remote::nvidia scoring: - provider_type: inline::basic - tool_runtime: - - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_type: inline::localfs image_type: venv diff --git a/llama_stack/distributions/nvidia/nvidia.py b/llama_stack/distributions/nvidia/nvidia.py index a92a2e6f8..889f83aa5 100644 --- a/llama_stack/distributions/nvidia/nvidia.py +++ b/llama_stack/distributions/nvidia/nvidia.py @@ -28,7 +28,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: BuildProvider(provider_type="remote::nvidia"), ], "scoring": [BuildProvider(provider_type="inline::basic")], - "tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")], + "tool_runtime": [], "files": [BuildProvider(provider_type="inline::localfs")], } @@ -66,12 +66,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: provider_id="nvidia", ) - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] + default_tool_groups: list[ToolGroupInput] = [] return DistributionTemplate( name=name, diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index c23d0f9cb..dca29ed2a 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -80,9 +80,7 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_id: meta-reference-files provider_type: inline::localfs @@ -128,9 +126,7 @@ registered_resources: datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime + tool_groups: [] server: port: 8321 telemetry: diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 81e744d53..e35d9c44c 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -69,9 +69,7 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime + tool_runtime: [] files: - provider_id: meta-reference-files provider_type: inline::localfs @@ -107,9 +105,7 @@ registered_resources: datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime + tool_groups: [] server: port: 8321 telemetry: diff --git a/llama_stack/distributions/open-benchmark/build.yaml b/llama_stack/distributions/open-benchmark/build.yaml index 05acd98e3..9fc0e9eb0 100644 --- a/llama_stack/distributions/open-benchmark/build.yaml +++ b/llama_stack/distributions/open-benchmark/build.yaml @@ -28,7 +28,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/llama_stack/distributions/open-benchmark/open_benchmark.py index 2b7760894..cceec74fd 100644 --- a/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -118,7 +118,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -154,10 +153,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] models, _ = get_model_registry(available_models) diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index 4fd0e199b..8f63e4417 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -118,8 +118,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol storage: @@ -244,8 +242,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/postgres-demo/build.yaml b/llama_stack/distributions/postgres-demo/build.yaml index 063dc3999..99b4edeb3 100644 --- a/llama_stack/distributions/postgres-demo/build.yaml +++ b/llama_stack/distributions/postgres-demo/build.yaml @@ -14,7 +14,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv additional_pip_packages: diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py index 876370ef3..9f8d35cb1 100644 --- a/llama_stack/distributions/postgres-demo/postgres_demo.py +++ b/llama_stack/distributions/postgres-demo/postgres_demo.py @@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], } @@ -66,10 +65,6 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] default_models = [ diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index 0d7ecff48..67222969c 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -54,8 +54,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol storage: @@ -107,8 +105,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml index b2e2a0c85..678d7995d 100644 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ b/llama_stack/distributions/starter-gpu/build.yaml @@ -49,7 +49,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 92483c78e..4764dc02c 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -219,8 +219,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -266,8 +264,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml index baa80ef3e..e6cd3c688 100644 --- a/llama_stack/distributions/starter/build.yaml +++ b/llama_stack/distributions/starter/build.yaml @@ -49,7 +49,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol batches: - provider_type: inline::reference diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 3b9d8f890..88358501e 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -216,8 +216,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol batches: @@ -263,8 +261,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index c8c7101a6..bad6279bd 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -140,7 +140,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], "batches": [ @@ -162,10 +161,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] default_shields = [ # if the diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml index dba1a94e2..d2c396085 100644 --- a/llama_stack/distributions/watsonx/build.yaml +++ b/llama_stack/distributions/watsonx/build.yaml @@ -23,7 +23,6 @@ distribution_spec: tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol files: - provider_type: inline::localfs diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index ca3c8402d..ddc7e095f 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -83,8 +83,6 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - provider_id: model-context-protocol provider_type: remote::model-context-protocol files: @@ -125,8 +123,6 @@ registered_resources: tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime server: port: 8321 telemetry: diff --git a/llama_stack/distributions/watsonx/watsonx.py b/llama_stack/distributions/watsonx/watsonx.py index d79aea872..b16f76fcb 100644 --- a/llama_stack/distributions/watsonx/watsonx.py +++ b/llama_stack/distributions/watsonx/watsonx.py @@ -33,7 +33,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: "tool_runtime": [ BuildProvider(provider_type="remote::brave-search"), BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), BuildProvider(provider_type="remote::model-context-protocol"), ], "files": [BuildProvider(provider_type="inline::localfs")], @@ -50,10 +49,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: toolgroup_id="builtin::websearch", provider_id="tavily-search", ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), ] files_provider = Provider( diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index c8ff9cecb..9be3edb8e 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -17,7 +17,7 @@ from llama_stack.apis.models import Model from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.apis.shields import Shield from llama_stack.apis.tools import ToolGroup -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.schema_utils import json_schema_type @@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol): async def unregister_shield(self, identifier: str) -> None: ... -class VectorDBsProtocolPrivate(Protocol): - async def register_vector_db(self, vector_db: VectorDB) -> None: ... +class VectorStoresProtocolPrivate(Protocol): + async def register_vector_store(self, vector_store: VectorStore) -> None: ... - async def unregister_vector_db(self, vector_db_id: str) -> None: ... + async def unregister_vector_store(self, vector_store_id: str) -> None: ... class DatasetsProtocolPrivate(Protocol): diff --git a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py deleted file mode 100644 index 78e49af94..000000000 --- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from datetime import UTC, datetime - -from opentelemetry.sdk.trace import ReadableSpan -from opentelemetry.sdk.trace.export import SpanProcessor -from opentelemetry.trace.status import StatusCode - -from llama_stack.log import get_logger - -logger = get_logger(name="console_span_processor", category="telemetry") - - -class ConsoleSpanProcessor(SpanProcessor): - def __init__(self, print_attributes: bool = False): - self.print_attributes = print_attributes - - def on_start(self, span: ReadableSpan, parent_context=None) -> None: - if span.attributes and span.attributes.get("__autotraced__"): - return - - timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]") - - def on_end(self, span: ReadableSpan) -> None: - timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]" - if span.status.status_code == StatusCode.ERROR: - span_context += " [bold red][ERROR][/bold red]" - elif span.status.status_code != StatusCode.UNSET: - span_context += f" [{span.status.status_code}]" - duration_ms = (span.end_time - span.start_time) / 1e6 - span_context += f" ({duration_ms:.2f}ms)" - logger.info(span_context) - - if self.print_attributes and span.attributes: - for key, value in span.attributes.items(): - if key.startswith("__"): - continue - str_value = str(value) - if len(str_value) > 1000: - str_value = str_value[:997] + "..." - logger.info(f" [dim]{key}[/dim]: {str_value}") - - for event in span.events: - event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3] - severity = event.attributes.get("severity", "info") - message = event.attributes.get("message", event.name) - if isinstance(message, dict) or isinstance(message, list): - message = json.dumps(message, indent=2) - severity_color = { - "error": "red", - "warn": "yellow", - "info": "white", - "debug": "dim", - }.get(severity, "white") - logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}") - if event.attributes: - for key, value in event.attributes.items(): - if key.startswith("__") or key in ["message", "severity"]: - continue - logger.info(f"[dim]{key}[/dim]: {value}") - - def shutdown(self) -> None: - """Shutdown the processor.""" - pass - - def force_flush(self, timeout_millis: float | None = None) -> bool: - """Force flush any pending spans.""" - return True diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 2a225476b..b15b1e490 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -79,8 +79,10 @@ class TelemetryAdapter(Telemetry): metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) metric_provider = MeterProvider(metric_readers=[metric_reader]) metrics.set_meter_provider(metric_provider) + self.is_otel_endpoint_set = True else: logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry") + self.is_otel_endpoint_set = False self.meter = metrics.get_meter(__name__) self._lock = _global_lock @@ -89,7 +91,8 @@ class TelemetryAdapter(Telemetry): pass async def shutdown(self) -> None: - trace.get_tracer_provider().force_flush() + if self.is_otel_endpoint_set: + trace.get_tracer_provider().force_flush() async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: if isinstance(event, UnstructuredLogEvent): diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/llama_stack/providers/inline/tool_runtime/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/inline/tool_runtime/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py deleted file mode 100644 index f9a7e7b89..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import RagToolRuntimeConfig - - -async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]): - from .memory import MemoryToolRuntimeImpl - - impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files]) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/llama_stack/providers/inline/tool_runtime/rag/config.py deleted file mode 100644 index 43ba78e65..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/config.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel - - -class RagToolRuntimeConfig(BaseModel): - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return {} diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py deleted file mode 100644 index 14cbec49d..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from jinja2 import Template - -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam -from llama_stack.apis.tools.rag_tool import ( - DefaultRAGQueryGeneratorConfig, - LLMRAGQueryGeneratorConfig, - RAGQueryGenerator, - RAGQueryGeneratorConfig, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) - - -async def generate_rag_query( - config: RAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - """ - Generates a query that will be used for - retrieving relevant information from the memory bank. - """ - if config.type == RAGQueryGenerator.default.value: - query = await default_rag_query_generator(config, content, **kwargs) - elif config.type == RAGQueryGenerator.llm.value: - query = await llm_rag_query_generator(config, content, **kwargs) - else: - raise NotImplementedError(f"Unsupported memory query generator {config.type}") - return query - - -async def default_rag_query_generator( - config: DefaultRAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - return interleaved_content_as_str(content, sep=config.separator) - - -async def llm_rag_query_generator( - config: LLMRAGQueryGeneratorConfig, - content: InterleavedContent, - **kwargs, -): - assert "inference_api" in kwargs, "LLMRAGQueryGenerator needs inference_api" - inference_api = kwargs["inference_api"] - - messages = [] - if isinstance(content, list): - messages = [interleaved_content_as_str(m) for m in content] - else: - messages = [interleaved_content_as_str(content)] - - template = Template(config.template) - rendered_content: str = template.render({"messages": messages}) - - model = config.model - message = OpenAIUserMessageParam(content=rendered_content) - params = OpenAIChatCompletionRequestWithExtraBody( - model=model, - messages=[message], - stream=False, - ) - response = await inference_api.openai_chat_completion(params) - - query = response.choices[0].message.content - - return query diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py deleted file mode 100644 index dc3dfbbca..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import base64 -import io -import mimetypes -from typing import Any - -import httpx -from fastapi import UploadFile -from pydantic import TypeAdapter - -from llama_stack.apis.common.content_types import ( - URL, - InterleavedContent, - InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import Inference -from llama_stack.apis.tools import ( - ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, - ToolDef, - ToolGroup, - ToolInvocationResult, - ToolRuntime, -) -from llama_stack.apis.vector_io import ( - QueryChunksResponse, - VectorIO, - VectorStoreChunkingStrategyStatic, - VectorStoreChunkingStrategyStaticConfig, -) -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.memory.vector_store import parse_data_url - -from .config import RagToolRuntimeConfig -from .context_retriever import generate_rag_query - -log = get_logger(name=__name__, category="tool_runtime") - - -async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: - """Get raw binary data and mime type from a RAGDocument for file upload.""" - if isinstance(doc.content, URL): - if doc.content.uri.startswith("data:"): - parts = parse_data_url(doc.content.uri) - mime_type = parts["mimetype"] - data = parts["data"] - - if parts["is_base64"]: - file_data = base64.b64decode(data) - else: - file_data = data.encode("utf-8") - - return file_data, mime_type - else: - async with httpx.AsyncClient() as client: - r = await client.get(doc.content.uri) - r.raise_for_status() - mime_type = r.headers.get("content-type", "application/octet-stream") - return r.content, mime_type - else: - if isinstance(doc.content, str): - content_str = doc.content - else: - content_str = interleaved_content_as_str(doc.content) - - if content_str.startswith("data:"): - parts = parse_data_url(content_str) - mime_type = parts["mimetype"] - data = parts["data"] - - if parts["is_base64"]: - file_data = base64.b64decode(data) - else: - file_data = data.encode("utf-8") - - return file_data, mime_type - else: - return content_str.encode("utf-8"), "text/plain" - - -class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime): - def __init__( - self, - config: RagToolRuntimeConfig, - vector_io_api: VectorIO, - inference_api: Inference, - files_api: Files, - ): - self.config = config - self.vector_io_api = vector_io_api - self.inference_api = inference_api - self.files_api = files_api - - async def initialize(self): - pass - - async def shutdown(self): - pass - - async def register_toolgroup(self, toolgroup: ToolGroup) -> None: - pass - - async def unregister_toolgroup(self, toolgroup_id: str) -> None: - return - - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - if not documents: - return - - for doc in documents: - try: - try: - file_data, mime_type = await raw_data_from_doc(doc) - except Exception as e: - log.error(f"Failed to extract content from document {doc.document_id}: {e}") - continue - - file_extension = mimetypes.guess_extension(mime_type) or ".txt" - filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}") - - file_obj = io.BytesIO(file_data) - file_obj.name = filename - - upload_file = UploadFile(file=file_obj, filename=filename) - - try: - created_file = await self.files_api.openai_upload_file( - file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS - ) - except Exception as e: - log.error(f"Failed to upload file for document {doc.document_id}: {e}") - continue - - chunking_strategy = VectorStoreChunkingStrategyStatic( - static=VectorStoreChunkingStrategyStaticConfig( - max_chunk_size_tokens=chunk_size_in_tokens, - chunk_overlap_tokens=chunk_size_in_tokens // 4, - ) - ) - - try: - await self.vector_io_api.openai_attach_file_to_vector_store( - vector_store_id=vector_db_id, - file_id=created_file.id, - attributes=doc.metadata, - chunking_strategy=chunking_strategy, - ) - except Exception as e: - log.error( - f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}" - ) - continue - - except Exception as e: - log.error(f"Unexpected error processing document {doc.document_id}: {e}") - continue - - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - if not vector_db_ids: - raise ValueError( - "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID." - ) - - query_config = query_config or RAGQueryConfig() - query = await generate_rag_query( - query_config.query_generator_config, - content, - inference_api=self.inference_api, - ) - tasks = [ - self.vector_io_api.query_chunks( - vector_db_id=vector_db_id, - query=query, - params={ - "mode": query_config.mode, - "max_chunks": query_config.max_chunks, - "score_threshold": 0.0, - "ranker": query_config.ranker, - }, - ) - for vector_db_id in vector_db_ids - ] - results: list[QueryChunksResponse] = await asyncio.gather(*tasks) - - chunks = [] - scores = [] - - for vector_db_id, result in zip(vector_db_ids, results, strict=False): - for chunk, score in zip(result.chunks, result.scores, strict=False): - if not hasattr(chunk, "metadata") or chunk.metadata is None: - chunk.metadata = {} - chunk.metadata["vector_db_id"] = vector_db_id - - chunks.append(chunk) - scores.append(score) - - if not chunks: - return RAGQueryResult(content=None) - - # sort by score - chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False) # type: ignore - chunks = chunks[: query_config.max_chunks] - - tokens = 0 - picked: list[InterleavedContentItem] = [ - TextContentItem( - text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n" - ) - ] - for i, chunk in enumerate(chunks): - metadata = chunk.metadata - tokens += metadata.get("token_count", 0) - tokens += metadata.get("metadata_token_count", 0) - - if tokens > query_config.max_tokens_in_context: - log.error( - f"Using {len(picked)} chunks; reached max tokens in context: {tokens}", - ) - break - - # Add useful keys from chunk_metadata to metadata and remove some from metadata - chunk_metadata_keys_to_include_from_context = [ - "chunk_id", - "document_id", - "source", - ] - metadata_keys_to_exclude_from_context = [ - "token_count", - "metadata_token_count", - "vector_db_id", - ] - metadata_for_context = {} - for k in chunk_metadata_keys_to_include_from_context: - metadata_for_context[k] = getattr(chunk.chunk_metadata, k) - for k in metadata: - if k not in metadata_keys_to_exclude_from_context: - metadata_for_context[k] = metadata[k] - - text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context) - picked.append(TextContentItem(text=text_content)) - - picked.append(TextContentItem(text="END of knowledge_search tool results.\n")) - picked.append( - TextContentItem( - text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n', - ) - ) - - return RAGQueryResult( - content=picked, - metadata={ - "document_ids": [c.document_id for c in chunks[: len(picked)]], - "chunks": [c.content for c in chunks[: len(picked)]], - "scores": scores[: len(picked)], - "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], - }, - ) - - async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None - ) -> ListToolDefsResponse: - # Parameters are not listed since these methods are not yet invoked automatically - # by the LLM. The method is only implemented so things like /tools can list without - # encountering fatals. - return ListToolDefsResponse( - data=[ - ToolDef( - name="insert_into_memory", - description="Insert documents into memory", - ), - ToolDef( - name="knowledge_search", - description="Search for information in a database.", - input_schema={ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The query to search for. Can be a natural language sentence or keywords.", - } - }, - "required": ["query"], - }, - ), - ] - ) - - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: - vector_db_ids = kwargs.get("vector_db_ids", []) - query_config = kwargs.get("query_config") - if query_config: - query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config) - else: - query_config = RAGQueryConfig() - - query = kwargs["query"] - result = await self.query( - content=query, - vector_db_ids=vector_db_ids, - query_config=query_config, - ) - - return ToolInvocationResult( - content=result.content or [], - metadata={ - **(result.metadata or {}), - "citation_files": getattr(result, "citation_files", None), - }, - ) diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index f13eb3e96..5e33d4ca3 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -17,21 +17,21 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import FaissVectorIOConfig logger = get_logger(name=__name__, category="vector_io") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" @@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex): ) -class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.cache: dict[str, VectorDBWithIndex] = {} + self.cache: dict[str, VectorStoreWithIndex] = {} async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) # Load existing banks from kvstore start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, - await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, + await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set(key=key, value=vector_db.model_dump_json()) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) # Store in cache - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, - index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, + index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, ) - async def list_vector_dbs(self) -> list[VectorDB]: - return [i.vector_db for i in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [i.vector_store for i in self.cache.values()] - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: assert self.kvstore is not None - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = self.cache.get(vector_db_id) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index cfe23bde5..37294f173 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -17,10 +17,10 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator @@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid" SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH} VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::" @@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex): await asyncio.to_thread(_delete_chunks) -class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): """ A VectorIO implementation using SQLite + sqlite_vec. - This class handles vector database registration (with metadata stored in a table named `vector_dbs`) - and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). + This class handles vector database registration (with metadata stored in a table named `vector_stores`) + and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex). """ def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.cache: dict[str, VectorDBWithIndex] = {} - self.vector_db_store = None + self.cache: dict[str, VectorStoreWithIndex] = {} + self.vector_store_table = None async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) - for db_json in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(db_json) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) + for db_json in stored_vector_stores: + vector_store = VectorStore.model_validate_json(db_json) index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, self.config.db_path, vector_db.identifier + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc # Clean up mixin resources (file batch tasks) await super().shutdown() - async def list_vector_dbs(self) -> list[VectorDB]: - return [v.vector_db for v in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [v.vector_store for v in self.cache.values()] - async def register_vector_db(self, vector_db: VectorDB) -> None: - index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + async def register_vector_store(self, vector_store: VectorStore) -> None: + index = await SQLiteVecIndex.create( + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier + ) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, + index = VectorStoreWithIndex( + vector_store=vector_store, index=SQLiteVecIndex( - dimension=vector_db.embedding_dimension, + dimension=vector_store.embedding_dimension, db_path=self.config.db_path, - bank_id=vector_db.identifier, + bank_id=vector_store.identifier, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) - # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api + # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api # and then call our index's add_chunks. await index.insert_chunks(chunks) async def query_chunks( self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a sqlite_vec index.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 35afb296d..2e52e2d12 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -42,6 +42,7 @@ def available_providers() -> list[ProviderSpec]: # CrossEncoder depends on torchao.quantization pip_packages=[ "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu", + "numpy tqdm transformers", "sentence-transformers --no-deps", # required by some SentenceTransformers architectures for tensor rearrange/merge ops "einops", diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py index 39dc7fccd..514d9d0a0 100644 --- a/llama_stack/providers/registry/tool_runtime.py +++ b/llama_stack/providers/registry/tool_runtime.py @@ -7,33 +7,13 @@ from llama_stack.providers.datatypes import ( Api, - InlineProviderSpec, ProviderSpec, RemoteProviderSpec, ) -from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS def available_providers() -> list[ProviderSpec]: return [ - InlineProviderSpec( - api=Api.tool_runtime, - provider_type="inline::rag-runtime", - pip_packages=DEFAULT_VECTOR_IO_DEPS - + [ - "tqdm", - "numpy", - "scikit-learn", - "scipy", - "nltk", - "sentencepiece", - "transformers", - ], - module="llama_stack.providers.inline.tool_runtime.rag", - config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", - api_dependencies=[Api.vector_io, Api.inference, Api.files], - description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.", - ), RemoteProviderSpec( api=Api.tool_runtime, adapter_type="brave-search", diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index ff3b8486f..db81ea35d 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -119,7 +119,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i #### Empirical Example Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. +in batches of 100 into both Faiss and sqlite-vec. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md index 74e0895f4..da57d5550 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/README.md +++ b/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service. Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 692b9125b..f1a828413 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM. Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/llama_stack/providers/remote/inference/nvidia/__init__.py index 1869cb748..b4926f33e 100644 --- a/llama_stack/providers/remote/inference/nvidia/__init__.py +++ b/llama_stack/providers/remote/inference/nvidia/__init__.py @@ -10,7 +10,7 @@ from .config import NVIDIAConfig async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference: - # import dynamically so `llama stack build` does not fail due to missing dependencies + # import dynamically so `llama stack list-deps` does not fail due to missing dependencies from .nvidia import NVIDIAInferenceAdapter if not isinstance(config, NVIDIAConfig): diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md index 9b088a615..789514b1e 100644 --- a/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/llama_stack/providers/remote/post_training/nvidia/README.md @@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md index 784ab464f..e589afe84 100644 --- a/llama_stack/providers/remote/safety/nvidia/README.md +++ b/llama_stack/providers/remote/safety/nvidia/README.md @@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV Build the NVIDIA environment: ```bash -llama stack build --distro nvidia --image-type venv +uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` ### Basic Usage using the LlamaStack Python Client diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 0aa728c32..2663ad43e 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -13,15 +13,15 @@ from numpy.typing import NDArray from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma") ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::" @@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex): raise NotImplementedError("Hybrid search is not supported in Chroma") -class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, @@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.inference_api = inference_api self.client = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) - self.vector_db_store = self.kvstore + self.vector_store_table = self.kvstore if isinstance(self.config, RemoteChromaVectorIOConfig): log.info(f"Connecting to Chroma server at: {self.config.url}") @@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: collection = await maybe_await( self.client.get_or_create_collection( - name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()} + name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()} ) ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, ChromaIndex(self.client, collection), self.inference_api + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, ChromaIndex(self.client, collection), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - log.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: + log.warning(f"Vector DB {vector_store_id} not found") return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") @@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack") - collection = await maybe_await(self.client.get_collection(vector_db_id)) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack") + collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: - raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") - index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api) - self.cache[vector_db_id] = index + raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") + index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api) + self.cache[vector_store_id] = index return index async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Chroma vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index d7c34163d..cccf13816 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_WEIGHTED, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig logger = get_logger(name=__name__, category="vector_io::milvus") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::" @@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex): raise -class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, @@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache = {} self.client = None self.inference_api = inference_api - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, index=MilvusIndex( client=self.client, - collection_name=vector_db.identifier, + collection_name=vector_store.identifier, consistency_level=self.config.consistency_level, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index if isinstance(self.config, RemoteMilvusVectorIOConfig): logger.info(f"Connecting to Milvus server at {self.config.uri}") self.client = MilvusClient(**self.config.model_dump(exclude_none=True)) @@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: if isinstance(self.config, RemoteMilvusVectorIOConfig): consistency_level = self.config.consistency_level else: consistency_level = "Strong" - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a milvus vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 703a47843..f28bd3cd9 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name from .config import PGVectorVectorIOConfig @@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig log = get_logger(name=__name__, category="vector_io::pgvector") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::" @@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex): def __init__( self, - vector_db: VectorDB, + vector_store: VectorStore, dimension: int, conn: psycopg2.extensions.connection, kvstore: KVStore | None = None, distance_metric: str = "COSINE", ): - self.vector_db = vector_db + self.vector_store = vector_store self.dimension = dimension self.conn = conn self.kvstore = kvstore @@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex): try: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # Sanitize the table name by replacing hyphens with underscores - # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens + # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens # when created with patterns like "test-vector-db-{uuid4()}" - sanitized_identifier = sanitize_collection_name(self.vector_db.identifier) + sanitized_identifier = sanitize_collection_name(self.vector_store.identifier) self.table_name = f"vs_{sanitized_identifier}" cur.execute( @@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex): """ ) except Exception as e: - log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") - raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e + log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") + raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray): assert len(chunks) == len(embeddings), ( @@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex): ) -class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None ) -> None: @@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco self.inference_api = inference_api self.conn = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: @@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: # Persist vector DB metadata in the KV store assert self.kvstore is not None # Upsert model metadata in Postgres - upsert_models(self.conn, [(vector_db.identifier, vector_db)]) + upsert_models(self.conn, [(vector_store.identifier, vector_store)]) # Create and cache the PGVector index table for the vector DB pgvector_index = PGVectorIndex( - vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore + vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api) - self.cache[vector_db.identifier] = index + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + self.cache[vector_store.identifier] = index - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: # Remove provider index and cache - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] # Delete vector DB metadata from KV store assert self.kvstore is not None - await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}") async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn) + index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() - self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) - return self.cache[vector_db_id] + self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) + return self.cache[vector_store_id] async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a PostgreSQL vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 6838d69e9..93d0894a6 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, QueryChunksResponse, @@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig @@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id" # KV store prefixes for vector databases VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::" def convert_id(_id: str) -> str: @@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex): await self.client.delete_collection(collection_name=self.collection_name) -class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, @@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api - self.vector_db_store = None + self.vector_store_table = None self._qdrant_lock = asyncio.Lock() async def initialize(self) -> None: @@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api) - self.cache[vector_db.identifier] = index + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api + ) + self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() async def shutdown(self) -> None: @@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set(key=key, value=vector_db.model_dump_json()) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) - index = VectorDBWithIndex( - vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api - ) - - self.cache[vector_db.identifier] = index - - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] - - assert self.kvstore is not None - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") - - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] - - if self.vector_db_store is None: - raise ValueError(f"Vector DB not found {vector_db_id}") - - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) - - index = VectorDBWithIndex( - vector_db=vector_db, - index=QdrantIndex(client=self.client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(self.client, vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + + self.cache[vector_store.identifier] = index + + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] + + assert self.kvstore is not None + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") + + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] + + if self.vector_store_table is None: + raise ValueError(f"Vector DB not found {vector_store_id}") + + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) + + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), + inference_api=self.inference_api, + ) + self.cache[vector_store_id] = index return index async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Qdrant vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 8e7eb7267..66922aa3f 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig log = get_logger(name=__name__, category="vector_io::weaviate") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::" @@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) -class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate): +class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate): def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.client_cache = {} self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" def _get_client(self) -> weaviate.WeaviateClient: @@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv end_key = f"{VECTOR_DBS_PREFIX}\xff" stored = await self.kvstore.values_in_range(start_key, end_key) for raw in stored: - vector_db = VectorDB.model_validate_json(raw) + vector_store = VectorStore.model_validate_json(raw) client = self._get_client() - idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, index=idx, inference_api=self.inference_api + idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore) + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, index=idx, inference_api=self.inference_api ) # Load OpenAI vector stores metadata into cache @@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) # Create collection if it doesn't exist if not client.collections.exists(sanitized_collection_name): client.collections.create( @@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv ], ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True) - if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: + sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True) + if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: return client.collections.delete(sanitized_collection_name) - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) if not client.collections.exists(sanitized_collection_name): raise ValueError(f"Collection with name `{sanitized_collection_name}` not found") - index = VectorDBWithIndex( - vector_db=vector_db, - index=WeaviateIndex(client=client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=WeaviateIndex(client=client, collection_name=vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 7806d98c1..6629fb965 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -17,7 +17,6 @@ from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore @@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::" @@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC): pass @abstractmethod - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: """Register a vector database (provider-specific implementation).""" pass @abstractmethod - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: """Unregister a vector database (provider-specific implementation).""" pass @@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC): extra_body = params.model_extra or {} metadata = params.metadata or {} - provider_vector_db_id = extra_body.get("provider_vector_db_id") + provider_vector_store_id = extra_body.get("provider_vector_store_id") # Use embedding info from metadata if available, otherwise from extra_body if metadata.get("embedding_model"): @@ -389,8 +389,8 @@ class OpenAIVectorStoreMixin(ABC): # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) - # Derive the canonical vector_db_id (allow override, else generate) - vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") + # Derive the canonical vector_store_id (allow override, else generate) + vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") if embedding_model is None: raise ValueError("embedding_model is required") @@ -398,19 +398,20 @@ class OpenAIVectorStoreMixin(ABC): if embedding_dimension is None: raise ValueError("Embedding dimension is required") - # Register the VectorDB backing this vector store + # Register the VectorStore backing this vector store if provider_id is None: raise ValueError("Provider ID is required but was not provided") - vector_db = VectorDB( - identifier=vector_db_id, + # call to the provider to create any index, etc. + vector_store = VectorStore( + identifier=vector_store_id, embedding_dimension=embedding_dimension, embedding_model=embedding_model, provider_id=provider_id, - provider_resource_id=vector_db_id, - vector_db_name=params.name, + provider_resource_id=vector_store_id, + vector_store_name=params.name, ) - await self.register_vector_db(vector_db) + await self.register_vector_store(vector_store) # Create OpenAI vector store metadata status = "completed" @@ -424,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC): total=0, ) store_info: dict[str, Any] = { - "id": vector_db_id, + "id": vector_store_id, "object": "vector_store", "created_at": created_at, "name": params.name, @@ -441,23 +442,23 @@ class OpenAIVectorStoreMixin(ABC): # Add provider information to metadata if provided if provider_id: metadata["provider_id"] = provider_id - if provider_vector_db_id: - metadata["provider_vector_db_id"] = provider_vector_db_id + if provider_vector_store_id: + metadata["provider_vector_store_id"] = provider_vector_store_id store_info["metadata"] = metadata # Save to persistent storage (provider-specific) - await self._save_openai_vector_store(vector_db_id, store_info) + await self._save_openai_vector_store(vector_store_id, store_info) # Store in memory cache - self.openai_vector_stores[vector_db_id] = store_info + self.openai_vector_stores[vector_store_id] = store_info # Now that our vector store is created, attach any files that were provided file_ids = params.file_ids or [] - tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] + tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) # Get the updated store info and return it - store_info = self.openai_vector_stores[vector_db_id] + store_info = self.openai_vector_stores[vector_store_id] return VectorStoreObject.model_validate(store_info) async def openai_list_vector_stores( @@ -567,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC): # Also delete the underlying vector DB try: - await self.unregister_vector_db(vector_store_id) + await self.unregister_vector_store(vector_store_id) except Exception as e: logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}") diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 0375ecaaa..9e9c9a08a 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -12,19 +12,16 @@ from dataclasses import dataclass from typing import Any from urllib.parse import unquote -import httpx import numpy as np from numpy.typing import NDArray from pydantic import BaseModel from llama_stack.apis.common.content_types import ( - URL, InterleavedContent, ) from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody -from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api @@ -129,31 +126,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en return "" -async def content_from_doc(doc: RAGDocument) -> str: - if isinstance(doc.content, URL): - if doc.content.uri.startswith("data:"): - return content_from_data(doc.content.uri) - async with httpx.AsyncClient() as client: - r = await client.get(doc.content.uri) - if doc.mime_type == "application/pdf": - return parse_pdf(r.content) - return r.text - elif isinstance(doc.content, str): - pattern = re.compile("^(https?://|file://|data:)") - if pattern.match(doc.content): - if doc.content.startswith("data:"): - return content_from_data(doc.content) - async with httpx.AsyncClient() as client: - r = await client.get(doc.content) - if doc.mime_type == "application/pdf": - return parse_pdf(r.content) - return r.text - return doc.content - else: - # will raise ValueError if the content is not List[InterleavedContent] or InterleavedContent - return interleaved_content_as_str(doc.content) - - def make_overlapped_chunks( document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any] ) -> list[Chunk]: @@ -187,7 +159,7 @@ def make_overlapped_chunks( updated_timestamp=int(time.time()), chunk_window=chunk_window, chunk_tokenizer=default_tokenizer, - chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` + chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks` content_token_count=len(toks), metadata_token_count=len(metadata_tokens), ) @@ -255,8 +227,8 @@ class EmbeddingIndex(ABC): @dataclass -class VectorDBWithIndex: - vector_db: VectorDB +class VectorStoreWithIndex: + vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference @@ -269,14 +241,14 @@ class VectorDBWithIndex: if c.embedding is None: chunks_to_embed.append(c) if c.chunk_metadata: - c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model - c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension + c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model + c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension else: - _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) + _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension) if chunks_to_embed: params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[c.content for c in chunks_to_embed], ) resp = await self.inference_api.openai_embeddings(params) @@ -319,7 +291,7 @@ class VectorDBWithIndex: return await self.index.query_keyword(query_string, k, score_threshold) params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[query_string], ) embeddings_response = await self.inference_api.openai_embeddings(params) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index c138de535..9b22dd8d5 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -2072,9 +2072,9 @@ } }, "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", - "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", "dev": true, "license": "MIT" }, @@ -3199,61 +3199,54 @@ } }, "node_modules/@tailwindcss/node": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.6.tgz", - "integrity": "sha512-ed6zQbgmKsjsVvodAS1q1Ld2BolEuxJOSyyNc+vhkjdmfNUDCmQnlXBfQkHrlzNmslxHsQU/bFmzcEbv4xXsLg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.14.tgz", + "integrity": "sha512-hpz+8vFk3Ic2xssIA3e01R6jkmsAhvkQdXlEbRTk6S10xDAtiQiM3FyvZVGsucefq764euO/b8WUW9ysLdThHw==", "dev": true, "license": "MIT", "dependencies": { - "@ampproject/remapping": "^2.3.0", - "enhanced-resolve": "^5.18.1", - "jiti": "^2.4.2", - "lightningcss": "1.29.2", - "magic-string": "^0.30.17", + "@jridgewell/remapping": "^2.3.4", + "enhanced-resolve": "^5.18.3", + "jiti": "^2.6.0", + "lightningcss": "1.30.1", + "magic-string": "^0.30.19", "source-map-js": "^1.2.1", - "tailwindcss": "4.1.6" + "tailwindcss": "4.1.14" } }, - "node_modules/@tailwindcss/node/node_modules/tailwindcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", - "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", - "dev": true, - "license": "MIT" - }, "node_modules/@tailwindcss/oxide": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz", - "integrity": "sha512-0bpEBQiGx+227fW4G0fLQ8vuvyy5rsB1YIYNapTq3aRsJ9taF3f5cCaovDjN5pUGKKzcpMrZst/mhNaKAPOHOA==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.14.tgz", + "integrity": "sha512-23yx+VUbBwCg2x5XWdB8+1lkPajzLmALEfMb51zZUBYaYVPDQvBSD/WYDqiVyBIo2BZFa3yw1Rpy3G2Jp+K0dw==", "dev": true, "hasInstallScript": true, "license": "MIT", "dependencies": { "detect-libc": "^2.0.4", - "tar": "^7.4.3" + "tar": "^7.5.1" }, "engines": { "node": ">= 10" }, "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.1.6", - "@tailwindcss/oxide-darwin-arm64": "4.1.6", - "@tailwindcss/oxide-darwin-x64": "4.1.6", - "@tailwindcss/oxide-freebsd-x64": "4.1.6", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.6", - "@tailwindcss/oxide-linux-arm64-gnu": "4.1.6", - "@tailwindcss/oxide-linux-arm64-musl": "4.1.6", - "@tailwindcss/oxide-linux-x64-gnu": "4.1.6", - "@tailwindcss/oxide-linux-x64-musl": "4.1.6", - "@tailwindcss/oxide-wasm32-wasi": "4.1.6", - "@tailwindcss/oxide-win32-arm64-msvc": "4.1.6", - "@tailwindcss/oxide-win32-x64-msvc": "4.1.6" + "@tailwindcss/oxide-android-arm64": "4.1.14", + "@tailwindcss/oxide-darwin-arm64": "4.1.14", + "@tailwindcss/oxide-darwin-x64": "4.1.14", + "@tailwindcss/oxide-freebsd-x64": "4.1.14", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.14", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.14", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.14", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.14", + "@tailwindcss/oxide-linux-x64-musl": "4.1.14", + "@tailwindcss/oxide-wasm32-wasi": "4.1.14", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.14", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.14" } }, "node_modules/@tailwindcss/oxide-android-arm64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.6.tgz", - "integrity": "sha512-VHwwPiwXtdIvOvqT/0/FLH/pizTVu78FOnI9jQo64kSAikFSZT7K4pjyzoDpSMaveJTGyAKvDjuhxJxKfmvjiQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.14.tgz", + "integrity": "sha512-a94ifZrGwMvbdeAxWoSuGcIl6/DOP5cdxagid7xJv6bwFp3oebp7y2ImYsnZBMTwjn5Ev5xESvS3FFYUGgPODQ==", "cpu": [ "arm64" ], @@ -3268,9 +3261,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-arm64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.6.tgz", - "integrity": "sha512-weINOCcqv1HVBIGptNrk7c6lWgSFFiQMcCpKM4tnVi5x8OY2v1FrV76jwLukfT6pL1hyajc06tyVmZFYXoxvhQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.14.tgz", + "integrity": "sha512-HkFP/CqfSh09xCnrPJA7jud7hij5ahKyWomrC3oiO2U9i0UjP17o9pJbxUN0IJ471GTQQmzwhp0DEcpbp4MZTA==", "cpu": [ "arm64" ], @@ -3285,9 +3278,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-x64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.6.tgz", - "integrity": "sha512-3FzekhHG0ww1zQjQ1lPoq0wPrAIVXAbUkWdWM8u5BnYFZgb9ja5ejBqyTgjpo5mfy0hFOoMnMuVDI+7CXhXZaQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.14.tgz", + "integrity": "sha512-eVNaWmCgdLf5iv6Qd3s7JI5SEFBFRtfm6W0mphJYXgvnDEAZ5sZzqmI06bK6xo0IErDHdTA5/t7d4eTfWbWOFw==", "cpu": [ "x64" ], @@ -3302,9 +3295,9 @@ } }, "node_modules/@tailwindcss/oxide-freebsd-x64": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.6.tgz", - "integrity": "sha512-4m5F5lpkBZhVQJq53oe5XgJ+aFYWdrgkMwViHjRsES3KEu2m1udR21B1I77RUqie0ZYNscFzY1v9aDssMBZ/1w==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.14.tgz", + "integrity": "sha512-QWLoRXNikEuqtNb0dhQN6wsSVVjX6dmUFzuuiL09ZeXju25dsei2uIPl71y2Ic6QbNBsB4scwBoFnlBfabHkEw==", "cpu": [ "x64" ], @@ -3319,9 +3312,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.6.tgz", - "integrity": "sha512-qU0rHnA9P/ZoaDKouU1oGPxPWzDKtIfX7eOGi5jOWJKdxieUJdVV+CxWZOpDWlYTd4N3sFQvcnVLJWJ1cLP5TA==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.14.tgz", + "integrity": "sha512-VB4gjQni9+F0VCASU+L8zSIyjrLLsy03sjcR3bM0V2g4SNamo0FakZFKyUQ96ZVwGK4CaJsc9zd/obQy74o0Fw==", "cpu": [ "arm" ], @@ -3336,9 +3329,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.6.tgz", - "integrity": "sha512-jXy3TSTrbfgyd3UxPQeXC3wm8DAgmigzar99Km9Sf6L2OFfn/k+u3VqmpgHQw5QNfCpPe43em6Q7V76Wx7ogIQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.14.tgz", + "integrity": "sha512-qaEy0dIZ6d9vyLnmeg24yzA8XuEAD9WjpM5nIM1sUgQ/Zv7cVkharPDQcmm/t/TvXoKo/0knI3me3AGfdx6w1w==", "cpu": [ "arm64" ], @@ -3353,9 +3346,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-musl": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.6.tgz", - "integrity": "sha512-8kjivE5xW0qAQ9HX9reVFmZj3t+VmljDLVRJpVBEoTR+3bKMnvC7iLcoSGNIUJGOZy1mLVq7x/gerVg0T+IsYw==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.14.tgz", + "integrity": "sha512-ISZjT44s59O8xKsPEIesiIydMG/sCXoMBCqsphDm/WcbnuWLxxb+GcvSIIA5NjUw6F8Tex7s5/LM2yDy8RqYBQ==", "cpu": [ "arm64" ], @@ -3370,9 +3363,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.6.tgz", - "integrity": "sha512-A4spQhwnWVpjWDLXnOW9PSinO2PTKJQNRmL/aIl2U/O+RARls8doDfs6R41+DAXK0ccacvRyDpR46aVQJJCoCg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.14.tgz", + "integrity": "sha512-02c6JhLPJj10L2caH4U0zF8Hji4dOeahmuMl23stk0MU1wfd1OraE7rOloidSF8W5JTHkFdVo/O7uRUJJnUAJg==", "cpu": [ "x64" ], @@ -3387,9 +3380,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-musl": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.6.tgz", - "integrity": "sha512-YRee+6ZqdzgiQAHVSLfl3RYmqeeaWVCk796MhXhLQu2kJu2COHBkqlqsqKYx3p8Hmk5pGCQd2jTAoMWWFeyG2A==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.14.tgz", + "integrity": "sha512-TNGeLiN1XS66kQhxHG/7wMeQDOoL0S33x9BgmydbrWAb9Qw0KYdd8o1ifx4HOGDWhVmJ+Ul+JQ7lyknQFilO3Q==", "cpu": [ "x64" ], @@ -3404,9 +3397,9 @@ } }, "node_modules/@tailwindcss/oxide-wasm32-wasi": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.6.tgz", - "integrity": "sha512-qAp4ooTYrBQ5pk5jgg54/U1rCJ/9FLYOkkQ/nTE+bVMseMfB6O7J8zb19YTpWuu4UdfRf5zzOrNKfl6T64MNrQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.14.tgz", + "integrity": "sha512-uZYAsaW/jS/IYkd6EWPJKW/NlPNSkWkBlaeVBi/WsFQNP05/bzkebUL8FH1pdsqx4f2fH/bWFcUABOM9nfiJkQ==", "bundleDependencies": [ "@napi-rs/wasm-runtime", "@emnapi/core", @@ -3422,21 +3415,81 @@ "license": "MIT", "optional": true, "dependencies": { - "@emnapi/core": "^1.4.3", - "@emnapi/runtime": "^1.4.3", - "@emnapi/wasi-threads": "^1.0.2", - "@napi-rs/wasm-runtime": "^0.2.9", - "@tybys/wasm-util": "^0.9.0", - "tslib": "^2.8.0" + "@emnapi/core": "^1.5.0", + "@emnapi/runtime": "^1.5.0", + "@emnapi/wasi-threads": "^1.1.0", + "@napi-rs/wasm-runtime": "^1.0.5", + "@tybys/wasm-util": "^0.10.1", + "tslib": "^2.4.0" }, "engines": { "node": ">=14.0.0" } }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": { + "version": "1.5.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.1.0", + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": { + "version": "1.5.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": { + "version": "1.1.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": { + "version": "1.0.5", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "^1.5.0", + "@emnapi/runtime": "^1.5.0", + "@tybys/wasm-util": "^0.10.1" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": { + "version": "2.8.1", + "dev": true, + "inBundle": true, + "license": "0BSD", + "optional": true + }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.6.tgz", - "integrity": "sha512-nqpDWk0Xr8ELO/nfRUDjk1pc9wDJ3ObeDdNMHLaymc4PJBWj11gdPCWZFKSK2AVKjJQC7J2EfmSmf47GN7OuLg==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.14.tgz", + "integrity": "sha512-Az0RnnkcvRqsuoLH2Z4n3JfAef0wElgzHD5Aky/e+0tBUxUhIeIqFBTMNQvmMRSP15fWwmvjBxZ3Q8RhsDnxAA==", "cpu": [ "arm64" ], @@ -3451,9 +3504,9 @@ } }, "node_modules/@tailwindcss/oxide-win32-x64-msvc": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.6.tgz", - "integrity": "sha512-5k9xF33xkfKpo9wCvYcegQ21VwIBU1/qEbYlVukfEIyQbEA47uK8AAwS7NVjNE3vHzcmxMYwd0l6L4pPjjm1rQ==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.14.tgz", + "integrity": "sha512-ttblVGHgf68kEE4om1n/n44I0yGPkCPbLsqzjvybhpwa6mKKtgFfAzy6btc3HRmuW7nHe0OOrSeNP9sQmmH9XA==", "cpu": [ "x64" ], @@ -3468,26 +3521,19 @@ } }, "node_modules/@tailwindcss/postcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.6.tgz", - "integrity": "sha512-ELq+gDMBuRXPJlpE3PEen+1MhnHAQQrh2zF0dI1NXOlEWfr2qWf2CQdr5jl9yANv8RErQaQ2l6nIFO9OSCVq/g==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.14.tgz", + "integrity": "sha512-BdMjIxy7HUNThK87C7BC8I1rE8BVUsfNQSI5siQ4JK3iIa3w0XyVvVL9SXLWO//CtYTcp1v7zci0fYwJOjB+Zg==", "dev": true, "license": "MIT", "dependencies": { "@alloc/quick-lru": "^5.2.0", - "@tailwindcss/node": "4.1.6", - "@tailwindcss/oxide": "4.1.6", + "@tailwindcss/node": "4.1.14", + "@tailwindcss/oxide": "4.1.14", "postcss": "^8.4.41", - "tailwindcss": "4.1.6" + "tailwindcss": "4.1.14" } }, - "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", - "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", - "dev": true, - "license": "MIT" - }, "node_modules/@testing-library/dom": { "version": "10.4.1", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", @@ -3812,12 +3858,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.3.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz", - "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==", + "version": "24.8.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.8.1.tgz", + "integrity": "sha512-alv65KGRadQVfVcG69MuB4IzdYVpRwMG/mq8KWOaoOdyY617P5ivaDiMCGOFDWD2sAn5Q0mR3mRtUOgm99hL9Q==", "license": "MIT", "dependencies": { - "undici-types": "~7.10.0" + "undici-types": "~7.14.0" } }, "node_modules/@types/node-fetch": { @@ -5850,9 +5896,9 @@ "license": "MIT" }, "node_modules/enhanced-resolve": { - "version": "5.18.1", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz", - "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==", + "version": "5.18.3", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz", + "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==", "dev": true, "license": "MIT", "dependencies": { @@ -9128,9 +9174,9 @@ } }, "node_modules/jiti": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", - "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", + "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", "dev": true, "license": "MIT", "bin": { @@ -9368,9 +9414,9 @@ } }, "node_modules/lightningcss": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz", - "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz", + "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==", "dev": true, "license": "MPL-2.0", "dependencies": { @@ -9384,22 +9430,22 @@ "url": "https://opencollective.com/parcel" }, "optionalDependencies": { - "lightningcss-darwin-arm64": "1.29.2", - "lightningcss-darwin-x64": "1.29.2", - "lightningcss-freebsd-x64": "1.29.2", - "lightningcss-linux-arm-gnueabihf": "1.29.2", - "lightningcss-linux-arm64-gnu": "1.29.2", - "lightningcss-linux-arm64-musl": "1.29.2", - "lightningcss-linux-x64-gnu": "1.29.2", - "lightningcss-linux-x64-musl": "1.29.2", - "lightningcss-win32-arm64-msvc": "1.29.2", - "lightningcss-win32-x64-msvc": "1.29.2" + "lightningcss-darwin-arm64": "1.30.1", + "lightningcss-darwin-x64": "1.30.1", + "lightningcss-freebsd-x64": "1.30.1", + "lightningcss-linux-arm-gnueabihf": "1.30.1", + "lightningcss-linux-arm64-gnu": "1.30.1", + "lightningcss-linux-arm64-musl": "1.30.1", + "lightningcss-linux-x64-gnu": "1.30.1", + "lightningcss-linux-x64-musl": "1.30.1", + "lightningcss-win32-arm64-msvc": "1.30.1", + "lightningcss-win32-x64-msvc": "1.30.1" } }, "node_modules/lightningcss-darwin-arm64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz", - "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz", + "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==", "cpu": [ "arm64" ], @@ -9418,9 +9464,9 @@ } }, "node_modules/lightningcss-darwin-x64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz", - "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz", + "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==", "cpu": [ "x64" ], @@ -9439,9 +9485,9 @@ } }, "node_modules/lightningcss-freebsd-x64": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz", - "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz", + "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==", "cpu": [ "x64" ], @@ -9460,9 +9506,9 @@ } }, "node_modules/lightningcss-linux-arm-gnueabihf": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz", - "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz", + "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==", "cpu": [ "arm" ], @@ -9481,9 +9527,9 @@ } }, "node_modules/lightningcss-linux-arm64-gnu": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz", - "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz", + "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==", "cpu": [ "arm64" ], @@ -9502,9 +9548,9 @@ } }, "node_modules/lightningcss-linux-arm64-musl": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz", - "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz", + "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==", "cpu": [ "arm64" ], @@ -9523,9 +9569,9 @@ } }, "node_modules/lightningcss-linux-x64-gnu": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz", - "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz", + "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==", "cpu": [ "x64" ], @@ -9544,9 +9590,9 @@ } }, "node_modules/lightningcss-linux-x64-musl": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz", - "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz", + "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==", "cpu": [ "x64" ], @@ -9565,9 +9611,9 @@ } }, "node_modules/lightningcss-win32-arm64-msvc": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz", - "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz", + "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==", "cpu": [ "arm64" ], @@ -9586,9 +9632,9 @@ } }, "node_modules/lightningcss-win32-x64-msvc": { - "version": "1.29.2", - "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz", - "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==", + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz", + "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==", "cpu": [ "x64" ], @@ -9726,13 +9772,13 @@ } }, "node_modules/magic-string": { - "version": "0.30.17", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", - "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", + "version": "0.30.19", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.19.tgz", + "integrity": "sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==", "dev": true, "license": "MIT", "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" + "@jridgewell/sourcemap-codec": "^1.5.5" } }, "node_modules/make-dir": { @@ -10717,9 +10763,9 @@ } }, "node_modules/minizlib": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz", - "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", + "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", "dev": true, "license": "MIT", "dependencies": { @@ -10729,22 +10775,6 @@ "node": ">= 18" } }, - "node_modules/mkdirp": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", - "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", - "dev": true, - "license": "MIT", - "bin": { - "mkdirp": "dist/cjs/src/bin.js" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/motion-dom": { "version": "12.23.23", "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz", @@ -12989,34 +13019,37 @@ } }, "node_modules/tailwindcss": { - "version": "4.1.13", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz", - "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==", + "version": "4.1.14", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.14.tgz", + "integrity": "sha512-b7pCxjGO98LnxVkKjaZSDeNuljC4ueKUddjENJOADtubtdo8llTaJy7HwBMeLNSSo2N5QIAgklslK1+Ir8r6CA==", "dev": true, "license": "MIT" }, "node_modules/tapable": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", - "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", + "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", "dev": true, "license": "MIT", "engines": { "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" } }, "node_modules/tar": { - "version": "7.4.3", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz", - "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==", + "version": "7.5.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz", + "integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==", "dev": true, "license": "ISC", "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", - "minizlib": "^3.0.1", - "mkdirp": "^3.0.1", + "minizlib": "^3.1.0", "yallist": "^5.0.0" }, "engines": { @@ -13418,9 +13451,9 @@ } }, "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.14.0.tgz", + "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==", "license": "MIT" }, "node_modules/unified": { diff --git a/scripts/docker.sh b/scripts/docker.sh index 7a5c3e6e0..a0690c8a9 100755 --- a/scripts/docker.sh +++ b/scripts/docker.sh @@ -156,6 +156,16 @@ DISTRO=$(echo "$DISTRO" | sed 's/^docker://') CONTAINER_NAME="llama-stack-test-$DISTRO" +should_copy_source() { + if [[ "$USE_COPY_NOT_MOUNT" == "true" ]]; then + return 0 + fi + if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then + return 0 + fi + return 1 +} + # Function to check if container is running is_container_running() { docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$" @@ -183,20 +193,29 @@ stop_container() { build_image() { echo "=== Building Docker Image for distribution: $DISTRO ===" # Get the repo root (parent of scripts directory) - SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + local script_dir + script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + local repo_root + repo_root=$(cd "$script_dir/.." && pwd) - # Determine whether to copy or mount source - # Copy in CI or if explicitly requested, otherwise mount for live development - BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT" - if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then - echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})" - BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" - else - echo "Will mount source for live development" + local containerfile="$repo_root/containers/Containerfile" + if [[ ! -f "$containerfile" ]]; then + echo "❌ Containerfile not found at $containerfile" + exit 1 fi - if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then + local build_cmd=( + docker + build + "$repo_root" + -f "$containerfile" + --tag "localhost/distribution-$DISTRO:dev" + --build-arg "DISTRO_NAME=$DISTRO" + --build-arg "INSTALL_MODE=editable" + --build-arg "LLAMA_STACK_DIR=/workspace" + ) + + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 fi @@ -224,7 +243,7 @@ start_container() { # Check if image exists (with or without localhost/ prefix) if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then echo "❌ Error: Image distribution-$DISTRO:dev does not exist" - echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container" + echo "Either build it first without --no-rebuild, or run: docker build . -f containers/Containerfile --build-arg DISTRO_NAME=$DISTRO --tag localhost/distribution-$DISTRO:dev" exit 1 fi echo "✅ Found existing image for distribution-$DISTRO:dev" @@ -236,8 +255,10 @@ start_container() { echo "=== Starting Docker Container ===" # Get the repo root for volume mount - SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd) - REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + local script_dir + script_dir=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd) + local repo_root + repo_root=$(cd "$script_dir/.." && pwd) # Determine the actual image name (may have localhost/ prefix) IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) @@ -279,10 +300,18 @@ start_container() { NETWORK_MODE="--network host" fi + local source_mount="" + if should_copy_source; then + echo "Source baked into image (no volume mount)" + else + source_mount="-v \"$repo_root\":/workspace" + echo "Mounting $repo_root into /workspace" + fi + docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \ -p $PORT:$PORT \ $DOCKER_ENV_VARS \ - -v "$REPO_ROOT":/app/llama-stack-source \ + $source_mount \ "$IMAGE_NAME" \ --port $PORT diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index e19a5cc55..93739052b 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -238,6 +238,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then echo "Stopping Docker container..." container_name="llama-stack-test-$DISTRO" if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then + echo "Dumping container logs before stopping..." + docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true echo "Stopping and removing container: $container_name" docker stop "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true @@ -252,19 +254,24 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then export LLAMA_STACK_PORT=8321 echo "=== Building Docker Image for distribution: $DISTRO ===" - # Set LLAMA_STACK_DIR to repo root - # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development - BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR" - if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then - echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image" - BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" - else - echo "Local mode: will mount source for live development" + containerfile="$ROOT_DIR/containers/Containerfile" + if [[ ! -f "$containerfile" ]]; then + echo "❌ Containerfile not found at $containerfile" + exit 1 fi - eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container" + build_cmd=( + docker + build + "$ROOT_DIR" + -f "$containerfile" + --tag "localhost/distribution-$DISTRO:dev" + --build-arg "DISTRO_NAME=$DISTRO" + --build-arg "INSTALL_MODE=editable" + --build-arg "LLAMA_STACK_DIR=/workspace" + ) - if [ $? -ne 0 ]; then + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 fi @@ -304,7 +311,6 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then docker run -d --network host --name "$container_name" \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ $DOCKER_ENV_VARS \ - -v $ROOT_DIR:/app/llama-stack-source \ "$IMAGE_NAME" \ --port $LLAMA_STACK_PORT @@ -404,6 +410,21 @@ elif [ $exit_code -eq 5 ]; then echo "⚠️ No tests collected (pattern matched no tests)" else echo "❌ Tests failed" + echo "" + echo "=== Dumping last 100 lines of logs for debugging ===" + + # Output server or container logs based on stack config + if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then + echo "--- Last 100 lines of server.log ---" + tail -100 server.log + elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then + docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log" + if [[ -f "$docker_log_file" ]]; then + echo "--- Last 100 lines of $docker_log_file ---" + tail -100 "$docker_log_file" + fi + fi + exit 1 fi diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index a258eb1a0..041d10f10 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -37,6 +37,9 @@ def pytest_sessionstart(session): if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ: os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay" + if "LLAMA_STACK_LOGGING" not in os.environ: + os.environ["LLAMA_STACK_LOGGING"] = "all=warning" + if "SQLITE_STORE_DIR" not in os.environ: os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp() diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index ffd49033d..6a9e1f3b2 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -40,7 +40,7 @@ def is_port_available(port: int, host: str = "localhost") -> bool: def start_llama_stack_server(config_name: str) -> subprocess.Popen: """Start a llama stack server with the given config.""" - cmd = f"uv run --with llama-stack llama stack build --distro {config_name} --image-type venv --run" + cmd = f"uv run llama stack run {config_name}" devnull = open(os.devnull, "w") process = subprocess.Popen( shlex.split(cmd), diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index e5ca7a0db..1f67ddb24 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -49,46 +49,50 @@ def client_with_empty_registry(client_with_models): @vector_provider_wrapper -def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): - vector_db_name = "test_vector_db" +def test_vector_store_retrieve( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id # Retrieve the vector store and validate its properties - response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id) + response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id) assert response is not None - assert response.id == actual_vector_db_id - assert response.name == vector_db_name + assert response.id == actual_vector_store_id + assert response.name == vector_store_name assert response.id.startswith("vs_") @vector_provider_wrapper -def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): - vector_db_name = "test_vector_db" +def test_vector_store_register( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = response.id - assert actual_vector_db_id.startswith("vs_") - assert actual_vector_db_id != vector_db_name + actual_vector_store_id = response.id + assert actual_vector_store_id.startswith("vs_") + assert actual_vector_store_id != vector_store_name vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 1 vector_store = vector_stores.data[0] - assert vector_store.id == actual_vector_db_id - assert vector_store.name == vector_db_name + assert vector_store.id == actual_vector_store_id + assert vector_store.name == vector_store_name - client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id) + client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id) vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 0 @@ -108,23 +112,23 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe def test_insert_chunks( client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id ): - vector_db_name = "test_vector_db" + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=sample_chunks, ) response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="What is the capital of France?", ) assert response is not None @@ -133,7 +137,7 @@ def test_insert_chunks( query, expected_doc_id = test_case response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query=query, ) assert response is not None @@ -151,15 +155,15 @@ def test_insert_chunks_with_precomputed_embeddings( "inline::qdrant": {"score_threshold": -1.0}, "remote::qdrant": {"score_threshold": -1.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -170,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings( ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="precomputed embedding test", params=vector_io_provider_params_dict.get(provider, None), ) @@ -200,16 +204,16 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( "remote::qdrant": {"score_threshold": 0.0}, "inline::qdrant": {"score_threshold": 0.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "embedding_model": embedding_model_id, "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -220,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="duplicate", params=vector_io_provider_params_dict.get(provider, None), ) diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 997df0d78..dd3246cb3 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -21,7 +21,7 @@ async def test_single_provider_auto_selection(): Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) ] ) - mock_routing_table.register_vector_db = AsyncMock( + mock_routing_table.register_vector_store = AsyncMock( return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123") ) mock_routing_table.get_provider_impl = AsyncMock( diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py deleted file mode 100644 index 52a71286b..000000000 --- a/tests/unit/distribution/test_build_path.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from pathlib import Path - -from llama_stack.cli.stack._build import ( - _run_stack_build_command_from_build_config, -) -from llama_stack.core.datatypes import BuildConfig, DistributionSpec -from llama_stack.core.utils.image_types import LlamaStackImageType - - -def test_container_build_passes_path(monkeypatch, tmp_path): - called_with = {} - - def spy_build_image(build_config, image_name, distro_or_config, run_config=None): - called_with["path"] = distro_or_config - called_with["run_config"] = run_config - return 0 - - monkeypatch.setattr( - "llama_stack.cli.stack._build.build_image", - spy_build_image, - raising=True, - ) - - cfg = BuildConfig( - image_type=LlamaStackImageType.CONTAINER.value, - distribution_spec=DistributionSpec(providers={}, description=""), - ) - - _run_stack_build_command_from_build_config(cfg, image_name="dummy") - - assert "path" in called_with - assert isinstance(called_with["path"], str) - assert Path(called_with["path"]).exists() - assert called_with["run_config"] is None diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 590bdd1d2..3a5cd5bf7 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -4,138 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import patch import pytest -from llama_stack.apis.common.content_types import URL, TextContentItem -from llama_stack.apis.tools import RAGDocument -from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc - - -async def test_content_from_doc_with_url(): - """Test extracting content from RAGDocument with URL content.""" - mock_url = URL(uri="https://example.com") - mock_doc = RAGDocument(document_id="foo", content=mock_url) - - mock_response = MagicMock() - mock_response.text = "Sample content from URL" - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - - result = await content_from_doc(mock_doc) - - assert result == "Sample content from URL" - mock_instance.get.assert_called_once_with(mock_url.uri) - - -async def test_content_from_doc_with_pdf_url(): - """Test extracting content from RAGDocument with URL pointing to a PDF.""" - mock_url = URL(uri="https://example.com/document.pdf") - mock_doc = RAGDocument(document_id="foo", content=mock_url, mime_type="application/pdf") - - mock_response = MagicMock() - mock_response.content = b"PDF binary data" - - with ( - patch("httpx.AsyncClient") as mock_client, - patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf, - ): - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - mock_parse_pdf.return_value = "Extracted PDF content" - - result = await content_from_doc(mock_doc) - - assert result == "Extracted PDF content" - mock_instance.get.assert_called_once_with(mock_url.uri) - mock_parse_pdf.assert_called_once_with(b"PDF binary data") - - -async def test_content_from_doc_with_data_url(): - """Test extracting content from RAGDocument with data URL content.""" - data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ=" # "Hello World" base64 encoded - mock_url = URL(uri=data_url) - mock_doc = RAGDocument(document_id="foo", content=mock_url) - - with patch("llama_stack.providers.utils.memory.vector_store.content_from_data") as mock_content_from_data: - mock_content_from_data.return_value = "Hello World" - - result = await content_from_doc(mock_doc) - - assert result == "Hello World" - mock_content_from_data.assert_called_once_with(data_url) - - -async def test_content_from_doc_with_string(): - """Test extracting content from RAGDocument with string content.""" - content_string = "This is plain text content" - mock_doc = RAGDocument(document_id="foo", content=content_string) - - result = await content_from_doc(mock_doc) - - assert result == content_string - - -async def test_content_from_doc_with_string_url(): - """Test extracting content from RAGDocument with string URL content.""" - url_string = "https://example.com" - mock_doc = RAGDocument(document_id="foo", content=url_string) - - mock_response = MagicMock() - mock_response.text = "Sample content from URL string" - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - - result = await content_from_doc(mock_doc) - - assert result == "Sample content from URL string" - mock_instance.get.assert_called_once_with(url_string) - - -async def test_content_from_doc_with_string_pdf_url(): - """Test extracting content from RAGDocument with string URL pointing to a PDF.""" - url_string = "https://example.com/document.pdf" - mock_doc = RAGDocument(document_id="foo", content=url_string, mime_type="application/pdf") - - mock_response = MagicMock() - mock_response.content = b"PDF binary data" - - with ( - patch("httpx.AsyncClient") as mock_client, - patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf, - ): - mock_instance = AsyncMock() - mock_instance.get.return_value = mock_response - mock_client.return_value.__aenter__.return_value = mock_instance - mock_parse_pdf.return_value = "Extracted PDF content from string URL" - - result = await content_from_doc(mock_doc) - - assert result == "Extracted PDF content from string URL" - mock_instance.get.assert_called_once_with(url_string) - mock_parse_pdf.assert_called_once_with(b"PDF binary data") - - -async def test_content_from_doc_with_interleaved_content(): - """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit).""" - interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")] - mock_doc = RAGDocument(document_id="foo", content=interleaved_content) - - with patch("llama_stack.providers.utils.memory.vector_store.interleaved_content_as_str") as mock_interleaved: - mock_interleaved.return_value = "First item\nSecond item" - - result = await content_from_doc(mock_doc) - - assert result == "First item\nSecond item" - mock_interleaved.assert_called_once_with(interleaved_content) +from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type def test_content_from_data_and_mime_type_success_utf8(): @@ -178,41 +51,3 @@ def test_content_from_data_and_mime_type_both_encodings_fail(): # Should raise an exception instead of returning empty string with pytest.raises(UnicodeDecodeError): content_from_data_and_mime_type(data, mime_type) - - -async def test_memory_tool_error_handling(): - """Test that memory tool handles various failures gracefully without crashing.""" - from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig - from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl - - config = RagToolRuntimeConfig() - memory_tool = MemoryToolRuntimeImpl( - config=config, - vector_io_api=AsyncMock(), - inference_api=AsyncMock(), - files_api=AsyncMock(), - ) - - docs = [ - RAGDocument(document_id="good_doc", content="Good content", metadata={}), - RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}), - RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}), - ] - - mock_file1 = MagicMock() - mock_file1.id = "file_good1" - mock_file2 = MagicMock() - mock_file2.id = "file_good2" - memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2] - - with patch("httpx.AsyncClient") as mock_client: - mock_instance = AsyncMock() - mock_instance.get.side_effect = Exception("Bad URL") - mock_client.return_value.__aenter__.return_value = mock_instance - - # won't raise exception despite one document failing - await memory_tool.insert(docs, "vector_store_123") - - # processed 2 documents successfully, skipped 1 - assert memory_tool.files_api.openai_upload_file.call_count == 2 - assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2 diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index c78596018..2951ca2e5 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter @@ -31,7 +31,7 @@ def vector_provider(request): @pytest.fixture -def vector_db_id() -> str: +def vector_store_id() -> str: return f"test-vector-db-{random.randint(1, 100)}" @@ -149,8 +149,8 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf ) collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}" await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=collection_id, provider_id="test_provider", embedding_model="test_model", @@ -186,8 +186,8 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding files_api=None, ) await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=f"faiss_test_collection_{np.random.randint(1e6)}", provider_id="test_provider", embedding_model="test_model", @@ -215,7 +215,7 @@ def mock_psycopg2_connection(): async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): connection, cursor = mock_psycopg2_connection - vector_db = VectorDB( + vector_store = VectorStore( identifier="test-vector-db", embedding_model="test-model", embedding_dimension=embedding_dimension, @@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"): - index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE") + index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE") index._test_chunks = [] original_add_chunks = index.add_chunks @@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd await adapter.initialize() adapter.conn = mock_conn - async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") await index.insert_chunks(chunks) adapter.insert_chunks = mock_insert_chunks - async def mock_query_chunks(vector_db_id, query, params=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_query_chunks(vector_store_id, query, params=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") return await index.query_chunks(query, params) adapter.query_chunks = mock_query_chunks - test_vector_db = VectorDB( + test_vector_store = VectorStore( identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}", provider_id="test_provider", embedding_model="test_model", embedding_dimension=embedding_dimension, ) - await adapter.register_vector_db(test_vector_db) - adapter.test_collection_id = test_vector_db.identifier + await adapter.register_vector_store(test_vector_store) + adapter.test_collection_id = test_vector_store.identifier yield adapter await adapter.shutdown() diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index fa5c5f56b..7b870d16e 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -11,8 +11,8 @@ import numpy as np import pytest from llama_stack.apis.files import Files -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import ( @@ -43,8 +43,8 @@ def embedding_dimension(): @pytest.fixture -def vector_db_id(): - return "test_vector_db" +def vector_store_id(): + return "test_vector_store" @pytest.fixture @@ -61,12 +61,12 @@ def sample_embeddings(embedding_dimension): @pytest.fixture -def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock: - mock_vector_db = MagicMock(spec=VectorDB) - mock_vector_db.embedding_model = "mock_embedding_model" - mock_vector_db.identifier = vector_db_id - mock_vector_db.embedding_dimension = embedding_dimension - return mock_vector_db +def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock: + mock_vector_store = MagicMock(spec=VectorStore) + mock_vector_store.embedding_model = "mock_embedding_model" + mock_vector_store.identifier = vector_store_id + mock_vector_store.embedding_dimension = embedding_dimension + return mock_vector_store @pytest.fixture diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index ad55b9336..65d7b7602 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -12,7 +12,6 @@ import numpy as np import pytest from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategyAuto, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX # This test is a unit test for the inline VectorIO providers. This should only contain @@ -71,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): key = f"{VECTOR_DBS_PREFIX}db1" - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump())) @@ -81,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.initialize() - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) await vector_io_adapter.shutdown() await vector_io_adapter.initialize() @@ -92,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.shutdown() -async def test_register_and_unregister_vector_db(vector_io_adapter): +async def test_register_and_unregister_vector_store(vector_io_adapter): unique_id = f"foo_db_{np.random.randint(1e6)}" - dummy = VectorDB( + dummy = VectorStore( identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) assert dummy.identifier in vector_io_adapter.cache - await vector_io_adapter.unregister_vector_db(dummy.identifier) + await vector_io_adapter.unregister_vector_store(dummy.identifier) assert dummy.identifier not in vector_io_adapter.cache @@ -121,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter): async def test_insert_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.insert_chunks("db_not_exist", []) @@ -170,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter async def test_query_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.query_chunks("db_missing", "q", None) @@ -182,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -198,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -214,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -229,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -998,8 +998,8 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): async def test_embedding_config_from_metadata(vector_io_adapter): """Test that embedding configuration is correctly extracted from metadata.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1015,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from metadata - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from metadata + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "test-embedding-model" assert call_args.embedding_dimension == 512 @@ -1025,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter): async def test_embedding_config_from_extra_body(vector_io_adapter): """Test that embedding configuration is correctly extracted from extra_body when metadata is empty.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1042,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from extra_body - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from extra_body + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "extra-body-model" assert call_args.embedding_dimension == 1024 @@ -1052,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): async def test_embedding_config_consistency_check_passes(vector_io_adapter): """Test that consistent embedding config in both metadata and extra_body passes validation.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1073,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should not raise any error and use metadata config - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "consistent-model" assert call_args.embedding_dimension == 768 @@ -1082,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): async def test_embedding_config_inconsistency_errors(vector_io_adapter): """Test that inconsistent embedding config between metadata and extra_body raises errors.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1104,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Reset mock for second test - vector_io_adapter.register_vector_db.reset_mock() + vector_io_adapter.register_vector_store.reset_mock() # Test with inconsistent embedding dimension params = OpenAICreateVectorStoreRequestWithExtraBody( @@ -1126,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): async def test_embedding_config_defaults_when_missing(vector_io_adapter): """Test that embedding dimension defaults to 768 when not provided.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1143,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should default to 768 dimensions - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "model-without-dimension" assert call_args.embedding_dimension == 768 @@ -1152,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): async def test_embedding_config_required_model_missing(vector_io_adapter): """Test that missing embedding model raises error.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" # Mock the default model lookup to return None (no default model available) diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py deleted file mode 100644 index a45b66f02..000000000 --- a/tests/unit/rag/test_rag_query.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from llama_stack.apis.tools.rag_tool import RAGQueryConfig -from llama_stack.apis.vector_io import ( - Chunk, - ChunkMetadata, - QueryChunksResponse, -) -from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl - - -class TestRagQuery: - async def test_query_raises_on_empty_vector_db_ids(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() - ) - with pytest.raises(ValueError): - await rag_tool.query(content=MagicMock(), vector_db_ids=[]) - - async def test_query_chunk_metadata_handling(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() - ) - content = "test query content" - vector_db_ids = ["db1"] - - chunk_metadata = ChunkMetadata( - document_id="doc1", - chunk_id="chunk1", - source="test_source", - metadata_token_count=5, - ) - interleaved_content = MagicMock() - chunk = Chunk( - content=interleaved_content, - metadata={ - "key1": "value1", - "token_count": 10, - "metadata_token_count": 5, - # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert() - "document_id": "doc1", - }, - stored_chunk_id="chunk1", - chunk_metadata=chunk_metadata, - ) - - query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0]) - - rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response) - result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids) - - assert result is not None - expected_metadata_string = ( - "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}" - ) - assert expected_metadata_string in result.content[1].text - assert result.content is not None - - async def test_query_raises_incorrect_mode(self): - with pytest.raises(ValueError): - RAGQueryConfig(mode="invalid_mode") - - async def test_query_accepts_valid_modes(self): - default_config = RAGQueryConfig() # Test default (vector) - assert default_config.mode == "vector" - vector_config = RAGQueryConfig(mode="vector") # Test vector - assert vector_config.mode == "vector" - keyword_config = RAGQueryConfig(mode="keyword") # Test keyword - assert keyword_config.mode == "keyword" - hybrid_config = RAGQueryConfig(mode="hybrid") # Test hybrid - assert hybrid_config.mode == "hybrid" - - # Test that invalid mode raises an error - with pytest.raises(ValueError): - RAGQueryConfig(mode="wrong_mode") - - async def test_query_adds_vector_db_id_to_chunk_metadata(self): - rag_tool = MemoryToolRuntimeImpl( - config=MagicMock(), - vector_io_api=MagicMock(), - inference_api=MagicMock(), - files_api=MagicMock(), - ) - - vector_db_ids = ["db1", "db2"] - - # Fake chunks from each DB - chunk_metadata1 = ChunkMetadata( - document_id="doc1", - chunk_id="chunk1", - source="test_source1", - metadata_token_count=5, - ) - chunk1 = Chunk( - content="chunk from db1", - metadata={"vector_db_id": "db1", "document_id": "doc1"}, - stored_chunk_id="c1", - chunk_metadata=chunk_metadata1, - ) - - chunk_metadata2 = ChunkMetadata( - document_id="doc2", - chunk_id="chunk2", - source="test_source2", - metadata_token_count=5, - ) - chunk2 = Chunk( - content="chunk from db2", - metadata={"vector_db_id": "db2", "document_id": "doc2"}, - stored_chunk_id="c2", - chunk_metadata=chunk_metadata2, - ) - - rag_tool.vector_io_api.query_chunks = AsyncMock( - side_effect=[ - QueryChunksResponse(chunks=[chunk1], scores=[0.9]), - QueryChunksResponse(chunks=[chunk2], scores=[0.8]), - ] - ) - - result = await rag_tool.query(content="test", vector_db_ids=vector_db_ids) - returned_chunks = result.metadata["chunks"] - returned_scores = result.metadata["scores"] - returned_doc_ids = result.metadata["document_ids"] - returned_vector_db_ids = result.metadata["vector_db_ids"] - - assert returned_chunks == ["chunk from db1", "chunk from db2"] - assert returned_scores == (0.9, 0.8) - assert returned_doc_ids == ["doc1", "doc2"] - assert returned_vector_db_ids == ["db1", "db2"] diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 1e40c98e8..e185b83e7 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -4,10 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import base64 -import mimetypes -import os -from pathlib import Path from unittest.mock import AsyncMock, MagicMock import numpy as np @@ -17,37 +13,13 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, ) -from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( - URL, - VectorDBWithIndex, + VectorStoreWithIndex, _validate_embedding, - content_from_doc, make_overlapped_chunks, ) -DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf" -# Depending on the machine, this can get parsed a couple of ways -DUMMY_PDF_TEXT_CHOICES = ["Dummy PDF file", "Dumm y PDF file"] - - -def read_file(file_path: str) -> bytes: - with open(file_path, "rb") as file: - return file.read() - - -def data_url_from_file(file_path: str) -> str: - with open(file_path, "rb") as file: - file_content = file.read() - - base64_content = base64.b64encode(file_content).decode("utf-8") - mime_type, _ = mimetypes.guess_type(file_path) - - data_url = f"data:{mime_type};base64,{base64_content}" - - return data_url - class TestChunk: def test_chunk(self): @@ -116,45 +88,6 @@ class TestValidateEmbedding: class TestVectorStore: - async def test_returns_content_from_pdf_data_uri(self): - data_uri = data_url_from_file(DUMMY_PDF_PATH) - doc = RAGDocument( - document_id="dummy", - content=data_uri, - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - - @pytest.mark.allow_network - async def test_downloads_pdf_and_returns_content(self): - # Using GitHub to host the PDF file - url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = RAGDocument( - document_id="dummy", - content=url, - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - - @pytest.mark.allow_network - async def test_downloads_pdf_and_returns_content_with_url_object(self): - # Using GitHub to host the PDF file - url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = RAGDocument( - document_id="dummy", - content=URL( - uri=url, - ), - mime_type="application/pdf", - metadata={}, - ) - content = await content_from_doc(doc) - assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.parametrize( "window_len, overlap_len, expected_chunks", [ @@ -206,15 +139,15 @@ class TestVectorStore: assert str(excinfo.value.__cause__) == "Cannot convert to string" -class TestVectorDBWithIndex: +class TestVectorStoreWithIndex: async def test_insert_chunks_without_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model without embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model without embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -227,7 +160,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() @@ -243,14 +176,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_valid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -258,7 +191,7 @@ class TestVectorDBWithIndex: Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_called_once() @@ -267,14 +200,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_invalid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_dimension = 3 - mock_vector_db.embedding_model = "test-model with invalid embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_dimension = 3 + mock_vector_store.embedding_model = "test-model with invalid embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) # Verify Chunk raises ValueError for invalid embedding type @@ -283,7 +216,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match="Input should be a valid list"): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( [ Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 2", embedding="invalid_type", metadata={}), @@ -292,7 +225,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={}) ) @@ -300,20 +233,20 @@ class TestVectorDBWithIndex: Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}), ] with pytest.raises(ValueError, match="has dimension 4, expected 3"): - await vector_db_with_index.insert_chunks(chunks_wrong_dim) + await vector_store_with_index.insert_chunks(chunks_wrong_dim) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() async def test_insert_chunks_with_partially_precomputed_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with partial embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with partial embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -327,7 +260,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 95022ad33..d4c9786d1 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -8,8 +8,8 @@ import pytest from llama_stack.apis.inference import Model -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.core.datatypes import VectorDBWithOwner +from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.datatypes import VectorStoreWithOwner from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.store.registry import ( KEY_FORMAT, @@ -20,12 +20,12 @@ from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_b @pytest.fixture -def sample_vector_db(): - return VectorDB( - identifier="test_vector_db", +def sample_vector_store(): + return VectorStore( + identifier="test_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) @@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry): assert result is None -async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): - print(f"Registering {sample_vector_db}") - await disk_dist_registry.register(sample_vector_db) +async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model): + print(f"Registering {sample_vector_store}") + await disk_dist_registry.register(sample_vector_store) print(f"Registering {sample_model}") await disk_dist_registry.register(sample_model) - print("Getting vector_db") - result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.provider_id == sample_vector_db.provider_id + print("Getting vector_store") + result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.provider_id == sample_vector_store.provider_id result_model = await disk_dist_registry.get("model", "test_model") assert result_model is not None @@ -63,11 +63,11 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m assert result_model.provider_id == sample_model.provider_id -async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): +async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model): # First populate the disk registry disk_registry = DiskDistributionRegistry(sqlite_kvstore) await disk_registry.initialize() - await disk_registry.register(sample_vector_db) + await disk_registry.register(sample_vector_store) await disk_registry.register(sample_model) # Test cached version loads from disk @@ -79,29 +79,29 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, ) await cached_registry.initialize() - result_vector_db = await cached_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension - assert result_vector_db.provider_id == sample_vector_db.provider_id + result_vector_store = await cached_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension + assert result_vector_store.provider_id == sample_vector_store.provider_id async def test_cached_registry_updates(cached_disk_dist_registry): - new_vector_db = VectorDB( - identifier="test_vector_db_2", + new_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - await cached_disk_dist_registry.register(new_vector_db) + await cached_disk_dist_registry.register(new_vector_store) # Verify in cache - result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id # Verify persisted to disk db_path = cached_disk_dist_registry.kvstore.db_path @@ -111,87 +111,89 @@ async def test_cached_registry_updates(cached_disk_dist_registry): await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) ) await new_registry.initialize() - result_vector_db = await new_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await new_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id async def test_duplicate_provider_registration(cached_disk_dist_registry): - original_vector_db = VectorDB( - identifier="test_vector_db_2", + original_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - assert await cached_disk_dist_registry.register(original_vector_db) + assert await cached_disk_dist_registry.register(original_vector_store) - duplicate_vector_db = VectorDB( - identifier="test_vector_db_2", + duplicate_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="different-model", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", # Same provider_id ) - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"): - await cached_disk_dist_registry.register(duplicate_vector_db) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists" + ): + await cached_disk_dist_registry.register(duplicate_vector_store) - result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") + result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") assert result is not None - assert result.embedding_model == original_vector_db.embedding_model # Original values preserved + assert result.embedding_model == original_vector_store.embedding_model # Original values preserved async def test_get_all_objects(cached_disk_dist_registry): # Create multiple test banks # Create multiple test banks - test_vector_dbs = [ - VectorDB( - identifier=f"test_vector_db_{i}", + test_vector_stores = [ + VectorStore( + identifier=f"test_vector_store_{i}", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id=f"test_vector_db_{i}", + provider_resource_id=f"test_vector_store_{i}", provider_id=f"provider_{i}", ) for i in range(3) ] - # Register all vector_dbs - for vector_db in test_vector_dbs: - await cached_disk_dist_registry.register(vector_db) + # Register all vector_stores + for vector_store in test_vector_stores: + await cached_disk_dist_registry.register(vector_store) # Test get_all retrieval all_results = await cached_disk_dist_registry.get_all() assert len(all_results) == 3 - # Verify each vector_db was stored correctly - for original_vector_db in test_vector_dbs: - matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier] - assert len(matching_vector_dbs) == 1 - stored_vector_db = matching_vector_dbs[0] - assert stored_vector_db.embedding_model == original_vector_db.embedding_model - assert stored_vector_db.provider_id == original_vector_db.provider_id - assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension + # Verify each vector_store was stored correctly + for original_vector_store in test_vector_stores: + matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier] + assert len(matching_vector_stores) == 1 + stored_vector_store = matching_vector_stores[0] + assert stored_vector_store.embedding_model == original_vector_store.embedding_model + assert stored_vector_store.provider_id == original_vector_store.provider_id + assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension async def test_parse_registry_values_error_handling(sqlite_kvstore): - valid_db = VectorDB( - identifier="valid_vector_db", + valid_db = VectorStore( + identifier="valid_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="valid_vector_db", + provider_resource_id="valid_vector_store", provider_id="test-provider", ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json() ) - await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json") + await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json") await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="missing_fields"), - '{"type": "vector_db", "identifier": "missing_fields"}', + KEY_FORMAT.format(type="vector_store", identifier="missing_fields"), + '{"type": "vector_store", "identifier": "missing_fields"}', ) test_registry = DiskDistributionRegistry(sqlite_kvstore) @@ -202,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore): # Should have filtered out the invalid entries assert len(all_objects) == 1 - assert all_objects[0].identifier == "valid_vector_db" + assert all_objects[0].identifier == "valid_vector_store" # Check that the get method also handles errors correctly - invalid_obj = await test_registry.get("vector_db", "corrupted_json") + invalid_obj = await test_registry.get("vector_store", "corrupted_json") assert invalid_obj is None - invalid_obj = await test_registry.get("vector_db", "missing_fields") + invalid_obj = await test_registry.get("vector_store", "missing_fields") assert invalid_obj is None async def test_cached_registry_error_handling(sqlite_kvstore): - valid_db = VectorDB( + valid_db = VectorStore( identifier="valid_cached_db", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, @@ -222,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore): ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json() ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"), - '{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string + KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"), + '{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string ) cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore) @@ -237,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore): assert len(all_objects) == 1 assert all_objects[0].identifier == "valid_cached_db" - invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db") + invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db") assert invalid_obj is None async def test_double_registration_identical_objects(disk_dist_registry): """Test that registering identical objects succeeds (idempotent).""" - vector_db = VectorDBWithOwner( - identifier="test_vector_db", + vector_store = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db) + result1 = await disk_dist_registry.register(vector_store) assert result1 is True # Second registration of identical object should also succeed (idempotent) - result2 = await disk_dist_registry.register(vector_db) + result2 = await disk_dist_registry.register(vector_store) assert result2 is True # Verify object exists and is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None - assert retrieved.identifier == vector_db.identifier - assert retrieved.embedding_model == vector_db.embedding_model + assert retrieved.identifier == vector_store.identifier + assert retrieved.embedding_model == vector_store.embedding_model async def test_double_registration_different_objects(disk_dist_registry): """Test that registering different objects with same identifier fails.""" - vector_db1 = VectorDBWithOwner( - identifier="test_vector_db", + vector_store1 = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) - vector_db2 = VectorDBWithOwner( - identifier="test_vector_db", # Same identifier + vector_store2 = VectorStoreWithOwner( + identifier="test_vector_store", # Same identifier embedding_model="different-model", # Different embedding model embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db1) + result1 = await disk_dist_registry.register(vector_store1) assert result1 is True # Second registration with different data should fail - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"): - await disk_dist_registry.register(vector_db2) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists" + ): + await disk_dist_registry.register(vector_store2) # Verify original object is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py index f21bbdd67..d6d4f4f23 100644 --- a/tests/unit/server/test_server.py +++ b/tests/unit/server/test_server.py @@ -41,7 +41,7 @@ class TestTranslateException: self.identifier = identifier self.owner = owner - resource = MockResource("vector_db", "test-db") + resource = MockResource("vector_store", "test-db") exc = AccessDeniedError("create", resource, user) result = translate_exception(exc) @@ -49,7 +49,7 @@ class TestTranslateException: assert isinstance(result, HTTPException) assert result.status_code == 403 assert "test-user" in result.detail - assert "vector_db::test-db" in result.detail + assert "vector_store::test-db" in result.detail assert "create" in result.detail assert "roles=['user']" in result.detail assert "teams=['dev']" in result.detail diff --git a/uv.lock b/uv.lock index 7f6e0401b..bbc917df5 100644 --- a/uv.lock +++ b/uv.lock @@ -921,16 +921,16 @@ wheels = [ [[package]] name = "fastapi" -version = "0.116.1" +version = "0.119.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/d7/6c8b3bfe33eeffa208183ec037fee0cce9f7f024089ab1c5d12ef04bd27c/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143", size = 296485, upload-time = "2025-07-11T16:22:32.057Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/f9/5c5bcce82a7997cc0eb8c47b7800f862f6b56adc40486ed246e5010d443b/fastapi-0.119.0.tar.gz", hash = "sha256:451082403a2c1f0b99c6bd57c09110ed5463856804c8078d38e5a1f1035dbbb7", size = 336756, upload-time = "2025-10-11T17:13:40.53Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" }, + { url = "https://files.pythonhosted.org/packages/ce/70/584c4d7cad80f5e833715c0a29962d7c93b4d18eed522a02981a6d1b6ee5/fastapi-0.119.0-py3-none-any.whl", hash = "sha256:90a2e49ed19515320abb864df570dd766be0662c5d577688f1600170f7f73cf2", size = 107095, upload-time = "2025-10-11T17:13:39.048Z" }, ] [[package]] @@ -5231,7 +5231,7 @@ wheels = [ [[package]] name = "weaviate-client" -version = "4.16.9" +version = "4.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "authlib" }, @@ -5242,9 +5242,9 @@ dependencies = [ { name = "pydantic" }, { name = "validators" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/e4/6a0b1501645f17a851067fc7bd0d5b53dc9777f2818be9c43debe06eda19/weaviate_client-4.16.9.tar.gz", hash = "sha256:d461071f1ff5ebddd0fc697959628a1d8caa12af1da071401ef25583c3084eba", size = 766390, upload-time = "2025-08-20T15:00:03.924Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/0e/e4582b007427187a9fde55fa575db4b766c81929d2b43a3dd8becce50567/weaviate_client-4.17.0.tar.gz", hash = "sha256:731d58d84b0989df4db399b686357ed285fb95971a492ccca8dec90bb2343c51", size = 769019, upload-time = "2025-09-26T11:20:27.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/1a/fc66f5f33961351c759d56453d18176849da8f64186c941183bb574b808b/weaviate_client-4.16.9-py3-none-any.whl", hash = "sha256:8b4adabaec0d513edef94c8c1de61c89a86eba3b63a4dc1acdfc9580e80199f4", size = 579098, upload-time = "2025-08-20T15:00:01.882Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/2da3a45866da7a935dab8ad07be05dcaee48b3ad4955144583b651929be7/weaviate_client-4.17.0-py3-none-any.whl", hash = "sha256:60e4a355b90537ee1e942ab0b76a94750897a13d9cf13c5a6decbd166d0ca8b5", size = 582763, upload-time = "2025-09-26T11:20:25.864Z" }, ] [[package]]