From ecc8a554d2f0897c5bada2ba8937dba98aaa8d12 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 12 Oct 2025 19:01:52 -0700 Subject: [PATCH] feat(api)!: support extra_body to embeddings and vector_stores APIs (#3794) Applies the same pattern from https://github.com/llamastack/llama-stack/pull/3777 to embeddings and vector_stores.create() endpoints. This should _not_ be a breaking change since (a) our tests were already using the `extra_body` parameter when passing in to the backend (b) but the backend probably wasn't extracting the parameters correctly. This PR will fix that. Updated APIs: `openai_embeddings(), openai_create_vector_store(), openai_create_vector_store_file_batch()` --- docs/static/deprecated-llama-stack-spec.html | 54 ++++------ docs/static/deprecated-llama-stack-spec.yaml | 68 ++++++------ docs/static/llama-stack-spec.html | 54 ++++------ docs/static/llama-stack-spec.yaml | 68 ++++++------ docs/static/stainless-llama-stack-spec.html | 54 ++++------ docs/static/stainless-llama-stack-spec.yaml | 68 ++++++------ llama_stack/apis/inference/inference.py | 31 ++++-- llama_stack/apis/vector_io/vector_io.py | 61 +++++++---- llama_stack/core/library_client.py | 14 +++ llama_stack/core/routers/inference.py | 23 ++-- llama_stack/core/routers/vector_io.py | 101 ++++++++++-------- .../inline/batches/reference/batches.py | 5 +- .../remote/inference/bedrock/bedrock.py | 7 +- .../remote/inference/cerebras/cerebras.py | 11 +- .../inference/llama_openai_compat/llama.py | 7 +- .../remote/inference/nvidia/nvidia.py | 17 ++- .../inference/passthrough/passthrough.py | 7 +- .../providers/remote/inference/tgi/tgi.py | 11 +- .../remote/inference/together/together.py | 23 ++-- .../utils/inference/embedding_mixin.py | 15 ++- .../utils/inference/litellm_openai_mixin.py | 15 ++- .../providers/utils/inference/openai_mixin.py | 31 +++--- .../utils/memory/openai_vector_store_mixin.py | 49 ++++----- .../providers/utils/memory/vector_store.py | 14 ++- .../test_vector_io_openai_vector_stores.py | 42 +++----- tests/unit/rag/test_vector_store.py | 27 +++-- 26 files changed, 451 insertions(+), 426 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 8c4c80014..46417522c 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -1662,7 +1662,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -2436,13 +2436,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -2622,7 +2622,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -2638,7 +2638,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8174,7 +8174,7 @@ "title": "OpenAICompletionChoice", "description": "A choice from an OpenAI-compatible completion response." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8197,6 +8197,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8213,7 +8214,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12061,19 +12063,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12099,7 +12101,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12125,7 +12127,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12151,23 +12153,12 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." - }, - "embedding_model": { - "type": "string", - "description": "The embedding model to use for this vector store." - }, - "embedding_dimension": { - "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "Set of key-value pairs that can be attached to the vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12337,7 +12328,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12345,7 +12336,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12371,18 +12362,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index e8cc035da..ffdfd8bc7 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1203,7 +1203,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/files: @@ -1792,13 +1792,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}: @@ -1924,7 +1927,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -1937,7 +1944,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6035,7 +6042,7 @@ components: title: OpenAICompletionChoice description: >- A choice from an OpenAI-compatible completion response. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6054,6 +6061,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6071,7 +6079,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9147,19 +9157,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9171,7 +9180,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9183,8 +9192,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9196,21 +9204,12 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. - embedding_model: - type: string - description: >- - The embedding model to use for this vector store. - embedding_dimension: - type: integer - description: >- - The dimension of the embedding vectors (default: 384). - provider_id: - type: string - description: >- - The ID of the provider to use for this vector store. + Set of key-value pairs that can be attached to the vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9331,7 +9330,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9339,7 +9338,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9351,16 +9350,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index bdfd606df..24e88b5f6 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -6324,7 +6324,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -6347,6 +6347,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -6363,7 +6364,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12587,19 +12589,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12625,7 +12627,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12651,7 +12653,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12677,23 +12679,12 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." - }, - "embedding_model": { - "type": "string", - "description": "The embedding model to use for this vector store." - }, - "embedding_dimension": { - "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "Set of key-value pairs that can be attached to the vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12863,7 +12854,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12871,7 +12862,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12897,18 +12888,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 7b05849dd..ac1641079 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -617,7 +617,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2413,13 +2413,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2545,7 +2548,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2558,7 +2565,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -4797,7 +4804,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -4816,6 +4823,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -4833,7 +4841,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9612,19 +9622,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9636,7 +9645,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9648,8 +9657,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9661,21 +9669,12 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. - embedding_model: - type: string - description: >- - The embedding model to use for this vector store. - embedding_dimension: - type: integer - description: >- - The dimension of the embedding vectors (default: 384). - provider_id: - type: string - description: >- - The ID of the provider to use for this vector store. + Set of key-value pairs that can be attached to the vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9796,7 +9795,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9804,7 +9803,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9816,16 +9815,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 7e0aaa2e9..4184f1379 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8333,7 +8333,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8356,6 +8356,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8372,7 +8373,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -14596,19 +14598,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -14634,7 +14636,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -14660,7 +14662,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -14686,23 +14688,12 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." - }, - "embedding_model": { - "type": "string", - "description": "The embedding model to use for this vector store." - }, - "embedding_dimension": { - "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "Set of key-value pairs that can be attached to the vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -14872,7 +14863,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -14880,7 +14871,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -14906,18 +14897,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9f462d61a..b01779abb 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -620,7 +620,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2416,13 +2416,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2548,7 +2551,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2561,7 +2568,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6242,7 +6249,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6261,6 +6268,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6278,7 +6286,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -11057,19 +11067,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -11081,7 +11090,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -11093,8 +11102,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -11106,21 +11114,12 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. - embedding_model: - type: string - description: >- - The embedding model to use for this vector store. - embedding_dimension: - type: integer - description: >- - The dimension of the embedding vectors (default: 384). - provider_id: - type: string - description: >- - The ID of the provider to use for this vector store. + Set of key-value pairs that can be attached to the vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -11241,7 +11240,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -11249,7 +11248,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -11261,16 +11260,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 3c1aa1f63..027246470 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1140,6 +1140,25 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): user: str | None = None +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible embeddings endpoint. + + :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. + :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. + :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". + :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + """ + + model: str + input: str | list[str] + encoding_format: str | None = "float" + dimensions: int | None = None + user: str | None = None + + @runtime_checkable @trace_protocol class InferenceProvider(Protocol): @@ -1200,21 +1219,11 @@ class InferenceProvider(Protocol): @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: """Create embeddings. Generate OpenAI-compatible embeddings for the given input using the specified model. - - :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. - :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. - :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". - :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. :returns: An OpenAIEmbeddingsResponse containing the embeddings. """ ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 238889099..3ced81bdd 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -11,6 +11,7 @@ import uuid from typing import Annotated, Any, Literal, Protocol, runtime_checkable +from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent @@ -466,6 +467,40 @@ class VectorStoreFilesListInBatchResponse(BaseModel): has_more: bool = False +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store with extra_body support. + + :param name: (Optional) A name for the vector store + :param file_ids: List of file IDs to include in the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param chunking_strategy: (Optional) Strategy for splitting files into chunks + :param metadata: Set of key-value pairs that can be attached to the vector store + """ + + name: str | None = None + file_ids: list[str] | None = None + expires_after: dict[str, Any] | None = None + chunking_strategy: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store file batch with extra_body support. + + :param file_ids: A list of File IDs that the vector store should use + :param attributes: (Optional) Key-value attributes to store with the files + :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto + """ + + file_ids: list[str] + attributes: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None + + class VectorDBStore(Protocol): def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... @@ -516,25 +551,11 @@ class VectorIO(Protocol): @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: """Creates a vector store. - :param name: A name for the vector store. - :param file_ids: A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files. - :param expires_after: The expiration policy for a vector store. - :param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. - :param metadata: Set of 16 key-value pairs that can be attached to an object. - :param embedding_model: The embedding model to use for this vector store. - :param embedding_dimension: The dimension of the embedding vectors (default: 384). - :param provider_id: The ID of the provider to use for this vector store. + Generate an OpenAI-compatible vector store with the given parameters. :returns: A VectorStoreObject representing the created vector store. """ ... @@ -827,16 +848,12 @@ class VectorIO(Protocol): async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch. + Generate an OpenAI-compatible vector store file batch for the given vector store. :param vector_store_id: The ID of the vector store to create the file batch for. - :param file_ids: A list of File IDs that the vector store should use. - :param attributes: (Optional) Key-value attributes to store with the files. - :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto. :returns: A VectorStoreFileBatchObject representing the created file batch. """ ... diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 5d45bd8ad..4d33576ba 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -513,6 +513,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): # Strip NOT_GIVENs to use the defaults in signature body = {k: v for k, v in body.items() if v is not NOT_GIVEN} + # Check if there's an unwrapped body parameter among multiple parameters + # (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)]) + unwrapped_body_param = None + for param in params_list: + if is_unwrapped_body_param(param.annotation): + unwrapped_body_param = param + break + # Convert parameters to Pydantic models where needed converted_body = {} for param_name, param in sig.parameters.items(): @@ -522,5 +530,11 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): converted_body[param_name] = value else: converted_body[param_name] = convert_to_pydantic(param.annotation, value) + elif unwrapped_body_param and param.name == unwrapped_body_param.name: + # This is the unwrapped body param - construct it from remaining body keys + base_type = get_args(param.annotation)[0] + # Extract only the keys that aren't already used by other params + remaining_keys = {k: v for k, v in body.items() if k not in converted_body} + converted_body[param.name] = base_type(**remaining_keys) return converted_body diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index e16d08371..b20ad44ca 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -40,6 +40,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIMessageParam, Order, @@ -279,26 +280,18 @@ class InferenceRouter(Inference): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: logger.debug( - f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}", - ) - model_obj = await self._get_model(model, ModelType.embedding) - params = dict( - model=model_obj.identifier, - input=input, - encoding_format=encoding_format, - dimensions=dimensions, - user=user, + f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", ) + model_obj = await self._get_model(params.model, ModelType.embedding) + + # Update model to use resolved identifier + params.model = model_obj.identifier provider = await self.routing_table.get_provider_impl(model_obj.identifier) - return await provider.openai_embeddings(**params) + return await provider.openai_embeddings(params) async def list_chat_completions( self, diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 0e3f9d8d9..79789ef0a 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -6,12 +6,16 @@ import asyncio import uuid -from typing import Any +from typing import Annotated, Any + +from fastapi import Body from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.models import ModelType from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, SearchRankingOptions, VectorIO, @@ -120,18 +124,19 @@ class VectorIORouter(VectorIO): # OpenAI Vector Stores API endpoints async def openai_create_vector_store( self, - name: str, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = None, - provider_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: - logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}") + # Extract llama-stack-specific parameters from extra_body + extra = params.model_extra or {} + embedding_model = extra.get("embedding_model") + embedding_dimension = extra.get("embedding_dimension", 384) + provider_id = extra.get("provider_id") + + logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}") # If no embedding model is provided, use the first available one + # TODO: this branch will soon be deleted so you _must_ provide the embedding_model when + # creating a vector store if embedding_model is None: embedding_model_info = await self._get_first_embedding_model() if embedding_model_info is None: @@ -146,20 +151,19 @@ class VectorIORouter(VectorIO): embedding_dimension=embedding_dimension, provider_id=provider_id, provider_vector_db_id=vector_db_id, - vector_db_name=name, + vector_db_name=params.name, ) provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) - return await provider.openai_create_vector_store( - name=name, - file_ids=file_ids, - expires_after=expires_after, - chunking_strategy=chunking_strategy, - metadata=metadata, - embedding_model=embedding_model, - embedding_dimension=embedding_dimension, - provider_id=registered_vector_db.provider_id, - provider_vector_db_id=registered_vector_db.provider_resource_id, - ) + + # Update model_extra with registered values so provider uses the already-registered vector_db + if params.model_extra is None: + params.model_extra = {} + params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id + params.model_extra["provider_id"] = registered_vector_db.provider_id + params.model_extra["embedding_model"] = embedding_model + params.model_extra["embedding_dimension"] = embedding_dimension + + return await provider.openai_create_vector_store(params) async def openai_list_vector_stores( self, @@ -219,7 +223,8 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}") - return await self.routing_table.openai_retrieve_vector_store(vector_store_id) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) async def openai_update_vector_store( self, @@ -229,7 +234,8 @@ class VectorIORouter(VectorIO): metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") - return await self.routing_table.openai_update_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( vector_store_id=vector_store_id, name=name, expires_after=expires_after, @@ -241,7 +247,8 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") - return await self.routing_table.openai_delete_vector_store(vector_store_id) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store(vector_store_id) async def openai_search_vector_store( self, @@ -254,7 +261,8 @@ class VectorIORouter(VectorIO): search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") - return await self.routing_table.openai_search_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( vector_store_id=vector_store_id, query=query, filters=filters, @@ -272,7 +280,8 @@ class VectorIORouter(VectorIO): chunking_strategy: VectorStoreChunkingStrategy | None = None, ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") - return await self.routing_table.openai_attach_file_to_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( vector_store_id=vector_store_id, file_id=file_id, attributes=attributes, @@ -289,7 +298,8 @@ class VectorIORouter(VectorIO): filter: VectorStoreFileStatus | None = None, ) -> list[VectorStoreFileObject]: logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") - return await self.routing_table.openai_list_files_in_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( vector_store_id=vector_store_id, limit=limit, order=order, @@ -304,7 +314,8 @@ class VectorIORouter(VectorIO): file_id: str, ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_retrieve_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, ) @@ -315,7 +326,8 @@ class VectorIORouter(VectorIO): file_id: str, ) -> VectorStoreFileContentsResponse: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") - return await self.routing_table.openai_retrieve_vector_store_file_contents( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, ) @@ -327,7 +339,8 @@ class VectorIORouter(VectorIO): attributes: dict[str, Any], ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_update_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, attributes=attributes, @@ -339,7 +352,8 @@ class VectorIORouter(VectorIO): file_id: str, ) -> VectorStoreFileDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_delete_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, ) @@ -370,17 +384,13 @@ class VectorIORouter(VectorIO): async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: - logger.debug(f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(file_ids)} files") - return await self.routing_table.openai_create_vector_store_file_batch( - vector_store_id=vector_store_id, - file_ids=file_ids, - attributes=attributes, - chunking_strategy=chunking_strategy, + logger.debug( + f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" ) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch(vector_store_id, params) async def openai_retrieve_vector_store_file_batch( self, @@ -388,7 +398,8 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreFileBatchObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_retrieve_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, ) @@ -404,7 +415,8 @@ class VectorIORouter(VectorIO): order: str | None = "desc", ) -> VectorStoreFilesListInBatchResponse: logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_list_files_in_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, after=after, @@ -420,7 +432,8 @@ class VectorIORouter(VectorIO): vector_store_id: str, ) -> VectorStoreFileBatchObject: logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_cancel_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, ) diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/llama_stack/providers/inline/batches/reference/batches.py index 102537dd7..fa581ae1f 100644 --- a/llama_stack/providers/inline/batches/reference/batches.py +++ b/llama_stack/providers/inline/batches/reference/batches.py @@ -25,6 +25,7 @@ from llama_stack.apis.inference import ( OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, OpenAIDeveloperMessageParam, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIMessageParam, OpenAISystemMessageParam, OpenAIToolMessageParam, @@ -640,7 +641,9 @@ class ReferenceBatchesImpl(Batches): }, } else: # /v1/embeddings - embeddings_response = await self.inference_api.openai_embeddings(**request.body) + embeddings_response = await self.inference_api.openai_embeddings( + OpenAIEmbeddingsRequestWithExtraBody(**request.body) + ) assert hasattr(embeddings_response, "model_dump_json"), ( "Embeddings response must have model_dump_json method" ) diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 057ed758b..d266f9e6f 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -14,6 +14,7 @@ from llama_stack.apis.inference import ( Inference, OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import ( @@ -124,11 +125,7 @@ class BedrockInferenceAdapter( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 0e24af0ee..daf67616b 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,7 +6,10 @@ from urllib.parse import urljoin -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig @@ -20,10 +23,6 @@ class CerebrasInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index e5fb3c77f..05d6e8cc8 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -7,6 +7,7 @@ from llama_stack.apis.inference.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.log import get_logger @@ -40,10 +41,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 9d8d1089a..37864b040 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -9,6 +9,7 @@ from openai import NOT_GIVEN from llama_stack.apis.inference import ( OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -78,11 +79,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ OpenAI-compatible embeddings for NVIDIA NIM. @@ -99,11 +96,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin): ) response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN, + user=params.user if params.user is not None else NOT_GIVEN, extra_body=extra_body, ) diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index 11306095b..4d4d4f41d 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -16,6 +16,7 @@ from llama_stack.apis.inference import ( OpenAIChatCompletionRequestWithExtraBody, OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.models import Model @@ -69,11 +70,7 @@ class PassthroughInferenceAdapter(Inference): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index da3205a13..6ae7b2544 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -10,7 +10,10 @@ from collections.abc import Iterable from huggingface_hub import AsyncInferenceClient, HfApi from pydantic import SecretStr -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -40,11 +43,7 @@ class _HfAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index e29cccf04..e31ebf7c5 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -11,6 +11,7 @@ from together import AsyncTogether from together.constants import BASE_URL from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage @@ -62,11 +63,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Together's OpenAI-compatible embeddings endpoint is not compatible with @@ -78,25 +75,27 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions """ # Together support ticket #13332 -> will not fix - if user is not None: + if params.user is not None: raise ValueError("Together's embeddings endpoint does not support user param.") # Together support ticket #13333 -> escalated - if dimensions is not None: + if params.dimensions is not None: raise ValueError("Together's embeddings endpoint does not support dimensions param.") response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format, ) - response.model = model # return the user the same model id they provided, avoid exposing the provider model id + response.model = ( + params.model + ) # return the user the same model id they provided, avoid exposing the provider model id # Together support ticket #13330 -> escalated # - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information if not hasattr(response, "usage") or response.usage is None: logger.warning( - f"Together's embedding endpoint for {model} did not return usage information, substituting -1s." + f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s." ) response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index facc59f65..375943442 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from llama_stack.apis.inference import ( ModelStore, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -32,26 +33,22 @@ class SentenceTransformerEmbeddingMixin: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: # Convert input to list format if it's a single string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input if not input_list: raise ValueError("Empty list not supported") # Get the model and generate embeddings - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id) embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False) # Convert embeddings to the requested format data = [] for i, embedding in enumerate(embeddings): - if encoding_format == "base64": + if params.encoding_format == "base64": # Convert float array to base64 string float_bytes = struct.pack(f"{len(embedding)}f", *embedding) embedding_value = base64.b64encode(float_bytes).decode("ascii") @@ -70,7 +67,7 @@ class SentenceTransformerEmbeddingMixin: usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index d1be1789a..42b89f897 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -20,6 +20,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ToolChoice, @@ -189,16 +190,12 @@ class LiteLLMOpenAIMixin( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) # Convert input to list if it's a string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input # Call litellm embedding function # litellm.drop_params = True @@ -207,11 +204,11 @@ class LiteLLMOpenAIMixin( input=input_list, api_key=self.get_api_key(), api_base=self.api_base, - dimensions=dimensions, + dimensions=params.dimensions, ) # Convert response to OpenAI format - data = b64_encode_openai_embeddings_response(response.data, encoding_format) + data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) usage = OpenAIEmbeddingUsage( prompt_tokens=response["usage"]["prompt_tokens"], diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 863ea161c..11c0b6829 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -21,6 +21,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, OpenAIMessageParam, @@ -316,23 +317,27 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Direct OpenAI embeddings API call. """ + # Prepare request parameters + request_params = { + "model": await self._get_provider_model_id(params.model), + "input": params.input, + "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, + "user": params.user if params.user is not None else NOT_GIVEN, + } + + # Add extra_body if present + extra_body = params.model_extra + if extra_body: + request_params["extra_body"] = extra_body + # Call OpenAI embeddings API with properly typed parameters - response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, - ) + response = await self.client.embeddings.create(**request_params) data = [] for i, embedding_data in enumerate(response.data): @@ -350,7 +355,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index ddfef9ba2..70bcbba32 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -10,8 +10,9 @@ import mimetypes import time import uuid from abc import ABC, abstractmethod -from typing import Any +from typing import Annotated, Any +from fastapi import Body from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError @@ -19,6 +20,8 @@ from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, SearchRankingOptions, VectorStoreChunkingStrategy, @@ -340,18 +343,18 @@ class OpenAIVectorStoreMixin(ABC): async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - provider_vector_db_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: """Creates a vector store.""" created_at = int(time.time()) + + # Extract llama-stack-specific parameters from extra_body + extra = params.model_extra or {} + provider_vector_db_id = extra.get("provider_vector_db_id") + embedding_model = extra.get("embedding_model") + embedding_dimension = extra.get("embedding_dimension", 384) + provider_id = extra.get("provider_id") + # Derive the canonical vector_db_id (allow override, else generate) vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") @@ -372,7 +375,7 @@ class OpenAIVectorStoreMixin(ABC): embedding_model=embedding_model, provider_id=provider_id, provider_resource_id=vector_db_id, - vector_db_name=name, + vector_db_name=params.name, ) await self.register_vector_db(vector_db) @@ -391,19 +394,19 @@ class OpenAIVectorStoreMixin(ABC): "id": vector_db_id, "object": "vector_store", "created_at": created_at, - "name": name, + "name": params.name, "usage_bytes": 0, "file_counts": file_counts.model_dump(), "status": status, - "expires_after": expires_after, + "expires_after": params.expires_after, "expires_at": None, "last_active_at": created_at, "file_ids": [], - "chunking_strategy": chunking_strategy, + "chunking_strategy": params.chunking_strategy, } # Add provider information to metadata if provided - metadata = metadata or {} + metadata = params.metadata or {} if provider_id: metadata["provider_id"] = provider_id if provider_vector_db_id: @@ -417,7 +420,7 @@ class OpenAIVectorStoreMixin(ABC): self.openai_vector_stores[vector_db_id] = store_info # Now that our vector store is created, attach any files that were provided - file_ids = file_ids or [] + file_ids = params.file_ids or [] tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) @@ -976,15 +979,13 @@ class OpenAIVectorStoreMixin(ABC): async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) - chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto() + chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto() created_at = int(time.time()) batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}") @@ -996,8 +997,8 @@ class OpenAIVectorStoreMixin(ABC): completed=0, cancelled=0, failed=0, - in_progress=len(file_ids), - total=len(file_ids), + in_progress=len(params.file_ids), + total=len(params.file_ids), ) # Create batch object immediately with in_progress status @@ -1011,8 +1012,8 @@ class OpenAIVectorStoreMixin(ABC): batch_info = { **batch_object.model_dump(), - "file_ids": file_ids, - "attributes": attributes, + "file_ids": params.file_ids, + "attributes": params.attributes, "chunking_strategy": chunking_strategy.model_dump(), "expires_at": expires_at, } diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index c0534a875..0375ecaaa 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ( URL, InterleavedContent, ) +from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse @@ -274,10 +275,11 @@ class VectorDBWithIndex: _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) if chunks_to_embed: - resp = await self.inference_api.openai_embeddings( - self.vector_db.embedding_model, - [c.content for c in chunks_to_embed], + params = OpenAIEmbeddingsRequestWithExtraBody( + model=self.vector_db.embedding_model, + input=[c.content for c in chunks_to_embed], ) + resp = await self.inference_api.openai_embeddings(params) for c, data in zip(chunks_to_embed, resp.data, strict=False): c.embedding = data.embedding @@ -316,7 +318,11 @@ class VectorDBWithIndex: if mode == "keyword": return await self.index.query_keyword(query_string, k, score_threshold) - embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string]) + params = OpenAIEmbeddingsRequestWithExtraBody( + model=self.vector_db.embedding_model, + input=[query_string], + ) + embeddings_response = await self.inference_api.openai_embeddings(params) query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32) if mode == "hybrid": return await self.index.query_hybrid( diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index ed0934224..28b07beb8 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -15,6 +15,7 @@ from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, QueryChunksResponse, VectorStoreChunkingStrategyAuto, VectorStoreFileObject, @@ -326,8 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter): vector_io_adapter._process_file_batch_async = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) assert batch.vector_store_id == store_id @@ -354,8 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter): # Create batch first created_batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Retrieve batch @@ -388,8 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Cancel batch @@ -434,8 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # List files @@ -455,7 +452,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): with pytest.raises(VectorStoreNotFoundError): await vector_io_adapter.openai_create_vector_store_file_batch( vector_store_id="nonexistent", - file_ids=["file_1"], + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]), ) # Setup store for remaining tests @@ -472,8 +469,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test wrong vector store for batch vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=["file_1"], + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) # Create wrong_store so it exists but the batch doesn't belong to it @@ -520,8 +516,7 @@ async def test_file_batch_pagination(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test pagination with limit @@ -593,8 +588,7 @@ async def test_file_batch_status_filtering(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test filtering by completed status @@ -636,8 +630,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Manually update status to completed @@ -671,8 +664,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -727,8 +719,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -775,10 +766,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create multiple batches batch1 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_1"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) batch2 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_2"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"]) ) # Complete one batch (should persist with completed status) @@ -791,7 +782,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create a third batch that stays in progress batch3 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_3"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"]) ) # Simulate restart - clear memory and reload from persistence @@ -952,8 +943,7 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5 batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Give time for the semaphore logic to start processing files diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 8c017a551..1e40c98e8 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -13,7 +13,10 @@ from unittest.mock import AsyncMock, MagicMock import numpy as np import pytest -from llama_stack.apis.inference.inference import OpenAIEmbeddingData +from llama_stack.apis.inference.inference import ( + OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, +) from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( @@ -226,9 +229,14 @@ class TestVectorDBWithIndex: await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.openai_embeddings.assert_called_once_with( - "test-model without embeddings", ["Test 1", "Test 2"] - ) + # Verify openai_embeddings was called with correct params + mock_inference_api.openai_embeddings.assert_called_once() + call_args = mock_inference_api.openai_embeddings.call_args[0] + assert len(call_args) == 1 + params = call_args[0] + assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody) + assert params.model == "test-model without embeddings" + assert params.input == ["Test 1", "Test 2"] mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert args[0] == chunks @@ -321,9 +329,14 @@ class TestVectorDBWithIndex: await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.openai_embeddings.assert_called_once_with( - "test-model with partial embeddings", ["Test 1", "Test 3"] - ) + # Verify openai_embeddings was called with correct params + mock_inference_api.openai_embeddings.assert_called_once() + call_args = mock_inference_api.openai_embeddings.call_args[0] + assert len(call_args) == 1 + params = call_args[0] + assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody) + assert params.model == "test-model with partial embeddings" + assert params.input == ["Test 1", "Test 3"] mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert len(args[0]) == 3