From 74e2976c1e67c245b74f40f5f5e3e125c7faebc8 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 15:27:47 -0700 Subject: [PATCH] feat(api)!: support passing extra_body to embeddings and vector_stores APIs Applies the same pattern from #3777 to embeddings and vector_stores.create() endpoints. Breaking change: Method signatures now accept a single params object with Pydantic extra="allow" instead of individual parameters. Provider-specific params can be passed via extra_body and accessed through params.model_extra. Updated APIs: openai_embeddings(), openai_create_vector_store(), openai_create_vector_store_file_batch() --- docs/static/deprecated-llama-stack-spec.html | 49 ++++++++------ docs/static/deprecated-llama-stack-spec.yaml | 63 ++++++++++------- docs/static/llama-stack-spec.html | 49 ++++++++------ docs/static/llama-stack-spec.yaml | 63 ++++++++++------- docs/static/stainless-llama-stack-spec.html | 49 ++++++++------ docs/static/stainless-llama-stack-spec.yaml | 63 ++++++++++------- llama_stack/apis/inference/inference.py | 31 ++++++--- llama_stack/apis/vector_io/vector_io.py | 67 +++++++++++++------ llama_stack/core/routers/inference.py | 23 +++---- .../remote/inference/bedrock/bedrock.py | 7 +- .../remote/inference/cerebras/cerebras.py | 11 ++- .../inference/llama_openai_compat/llama.py | 7 +- .../remote/inference/nvidia/nvidia.py | 17 ++--- .../inference/passthrough/passthrough.py | 7 +- .../providers/remote/inference/tgi/tgi.py | 11 ++- .../remote/inference/together/together.py | 23 +++---- .../utils/inference/embedding_mixin.py | 15 ++--- .../utils/inference/litellm_openai_mixin.py | 15 ++--- .../providers/utils/inference/openai_mixin.py | 31 +++++---- .../utils/memory/openai_vector_store_mixin.py | 60 ++++++++--------- 20 files changed, 364 insertions(+), 297 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 8c4c80014..a3638749d 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -1662,7 +1662,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -2436,13 +2436,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -2622,7 +2622,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -2638,7 +2638,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8174,7 +8174,7 @@ "title": "OpenAICompletionChoice", "description": "A choice from an OpenAI-compatible completion response." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8197,6 +8197,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8213,7 +8214,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12061,19 +12063,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12099,7 +12101,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12125,7 +12127,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12151,23 +12153,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12337,7 +12341,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12345,7 +12349,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12371,18 +12375,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index e8cc035da..3cbcbc82b 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1203,7 +1203,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/files: @@ -1792,13 +1792,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}: @@ -1924,7 +1927,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -1937,7 +1944,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6035,7 +6042,7 @@ components: title: OpenAICompletionChoice description: >- A choice from an OpenAI-compatible completion response. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6054,6 +6061,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6071,7 +6079,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9147,19 +9157,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9171,7 +9180,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9183,8 +9192,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9196,21 +9204,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9331,7 +9343,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9339,7 +9351,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9351,16 +9363,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 38ec9dd1c..c1aa41abc 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -6324,7 +6324,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -6347,6 +6347,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -6363,7 +6364,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12587,19 +12589,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12625,7 +12627,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12651,7 +12653,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12677,23 +12679,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12863,7 +12867,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12871,7 +12875,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12897,18 +12901,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ad766d9d5..a66ceede8 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -617,7 +617,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2413,13 +2413,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2545,7 +2548,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2558,7 +2565,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -4797,7 +4804,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -4816,6 +4823,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -4833,7 +4841,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9612,19 +9622,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9636,7 +9645,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9648,8 +9657,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9661,21 +9669,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9796,7 +9808,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9804,7 +9816,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9816,16 +9828,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 657705a1c..717352b35 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8333,7 +8333,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8356,6 +8356,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8372,7 +8373,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -14596,19 +14598,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -14634,7 +14636,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -14660,7 +14662,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -14686,23 +14688,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -14872,7 +14876,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -14880,7 +14884,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -14906,18 +14910,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index d5dcbe7d3..4a5f7c8e0 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -620,7 +620,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2416,13 +2416,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2548,7 +2551,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2561,7 +2568,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6242,7 +6249,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6261,6 +6268,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6278,7 +6286,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -11057,19 +11067,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -11081,7 +11090,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -11093,8 +11102,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -11106,21 +11114,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -11241,7 +11253,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -11249,7 +11261,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -11261,16 +11273,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 3c1aa1f63..027246470 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1140,6 +1140,25 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): user: str | None = None +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible embeddings endpoint. + + :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. + :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. + :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". + :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + """ + + model: str + input: str | list[str] + encoding_format: str | None = "float" + dimensions: int | None = None + user: str | None = None + + @runtime_checkable @trace_protocol class InferenceProvider(Protocol): @@ -1200,21 +1219,11 @@ class InferenceProvider(Protocol): @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: """Create embeddings. Generate OpenAI-compatible embeddings for the given input using the specified model. - - :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. - :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. - :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". - :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. :returns: An OpenAIEmbeddingsResponse containing the embeddings. """ ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 238889099..4a13f0ebf 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -11,6 +11,7 @@ import uuid from typing import Annotated, Any, Literal, Protocol, runtime_checkable +from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent @@ -466,6 +467,46 @@ class VectorStoreFilesListInBatchResponse(BaseModel): has_more: bool = False +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store with extra_body support. + + :param name: (Optional) A name for the vector store + :param file_ids: List of file IDs to include in the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param chunking_strategy: (Optional) Strategy for splitting files into chunks + :param metadata: Set of key-value pairs that can be attached to the vector store + :param embedding_model: (Optional) The embedding model to use for this vector store + :param embedding_dimension: (Optional) The dimension of the embedding vectors (default: 384) + :param provider_id: (Optional) The ID of the provider to use for this vector store + """ + + name: str | None = None + file_ids: list[str] | None = None + expires_after: dict[str, Any] | None = None + chunking_strategy: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + embedding_model: str | None = None + embedding_dimension: int | None = 384 + provider_id: str | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store file batch with extra_body support. + + :param file_ids: A list of File IDs that the vector store should use + :param attributes: (Optional) Key-value attributes to store with the files + :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto + """ + + file_ids: list[str] + attributes: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None + + class VectorDBStore(Protocol): def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... @@ -516,25 +557,11 @@ class VectorIO(Protocol): @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: """Creates a vector store. - :param name: A name for the vector store. - :param file_ids: A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files. - :param expires_after: The expiration policy for a vector store. - :param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. - :param metadata: Set of 16 key-value pairs that can be attached to an object. - :param embedding_model: The embedding model to use for this vector store. - :param embedding_dimension: The dimension of the embedding vectors (default: 384). - :param provider_id: The ID of the provider to use for this vector store. + Generate an OpenAI-compatible vector store with the given parameters. :returns: A VectorStoreObject representing the created vector store. """ ... @@ -827,16 +854,12 @@ class VectorIO(Protocol): async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch. + Generate an OpenAI-compatible vector store file batch for the given vector store. :param vector_store_id: The ID of the vector store to create the file batch for. - :param file_ids: A list of File IDs that the vector store should use. - :param attributes: (Optional) Key-value attributes to store with the files. - :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto. :returns: A VectorStoreFileBatchObject representing the created file batch. """ ... diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index e16d08371..b20ad44ca 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -40,6 +40,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIMessageParam, Order, @@ -279,26 +280,18 @@ class InferenceRouter(Inference): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: logger.debug( - f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}", - ) - model_obj = await self._get_model(model, ModelType.embedding) - params = dict( - model=model_obj.identifier, - input=input, - encoding_format=encoding_format, - dimensions=dimensions, - user=user, + f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", ) + model_obj = await self._get_model(params.model, ModelType.embedding) + + # Update model to use resolved identifier + params.model = model_obj.identifier provider = await self.routing_table.get_provider_impl(model_obj.identifier) - return await provider.openai_embeddings(**params) + return await provider.openai_embeddings(params) async def list_chat_completions( self, diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 057ed758b..d266f9e6f 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -14,6 +14,7 @@ from llama_stack.apis.inference import ( Inference, OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import ( @@ -124,11 +125,7 @@ class BedrockInferenceAdapter( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 0e24af0ee..daf67616b 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,7 +6,10 @@ from urllib.parse import urljoin -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig @@ -20,10 +23,6 @@ class CerebrasInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index e5fb3c77f..05d6e8cc8 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -7,6 +7,7 @@ from llama_stack.apis.inference.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.log import get_logger @@ -40,10 +41,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 9d8d1089a..37864b040 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -9,6 +9,7 @@ from openai import NOT_GIVEN from llama_stack.apis.inference import ( OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -78,11 +79,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ OpenAI-compatible embeddings for NVIDIA NIM. @@ -99,11 +96,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin): ) response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN, + user=params.user if params.user is not None else NOT_GIVEN, extra_body=extra_body, ) diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index 11306095b..4d4d4f41d 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -16,6 +16,7 @@ from llama_stack.apis.inference import ( OpenAIChatCompletionRequestWithExtraBody, OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.models import Model @@ -69,11 +70,7 @@ class PassthroughInferenceAdapter(Inference): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index da3205a13..6ae7b2544 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -10,7 +10,10 @@ from collections.abc import Iterable from huggingface_hub import AsyncInferenceClient, HfApi from pydantic import SecretStr -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -40,11 +43,7 @@ class _HfAdapter(OpenAIMixin): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index e29cccf04..e31ebf7c5 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -11,6 +11,7 @@ from together import AsyncTogether from together.constants import BASE_URL from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage @@ -62,11 +63,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Together's OpenAI-compatible embeddings endpoint is not compatible with @@ -78,25 +75,27 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions """ # Together support ticket #13332 -> will not fix - if user is not None: + if params.user is not None: raise ValueError("Together's embeddings endpoint does not support user param.") # Together support ticket #13333 -> escalated - if dimensions is not None: + if params.dimensions is not None: raise ValueError("Together's embeddings endpoint does not support dimensions param.") response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format, ) - response.model = model # return the user the same model id they provided, avoid exposing the provider model id + response.model = ( + params.model + ) # return the user the same model id they provided, avoid exposing the provider model id # Together support ticket #13330 -> escalated # - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information if not hasattr(response, "usage") or response.usage is None: logger.warning( - f"Together's embedding endpoint for {model} did not return usage information, substituting -1s." + f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s." ) response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index facc59f65..375943442 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from llama_stack.apis.inference import ( ModelStore, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -32,26 +33,22 @@ class SentenceTransformerEmbeddingMixin: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: # Convert input to list format if it's a single string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input if not input_list: raise ValueError("Empty list not supported") # Get the model and generate embeddings - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id) embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False) # Convert embeddings to the requested format data = [] for i, embedding in enumerate(embeddings): - if encoding_format == "base64": + if params.encoding_format == "base64": # Convert float array to base64 string float_bytes = struct.pack(f"{len(embedding)}f", *embedding) embedding_value = base64.b64encode(float_bytes).decode("ascii") @@ -70,7 +67,7 @@ class SentenceTransformerEmbeddingMixin: usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index d1be1789a..42b89f897 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -20,6 +20,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ToolChoice, @@ -189,16 +190,12 @@ class LiteLLMOpenAIMixin( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) # Convert input to list if it's a string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input # Call litellm embedding function # litellm.drop_params = True @@ -207,11 +204,11 @@ class LiteLLMOpenAIMixin( input=input_list, api_key=self.get_api_key(), api_base=self.api_base, - dimensions=dimensions, + dimensions=params.dimensions, ) # Convert response to OpenAI format - data = b64_encode_openai_embeddings_response(response.data, encoding_format) + data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) usage = OpenAIEmbeddingUsage( prompt_tokens=response["usage"]["prompt_tokens"], diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 863ea161c..11c0b6829 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -21,6 +21,7 @@ from llama_stack.apis.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, OpenAIMessageParam, @@ -316,23 +317,27 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Direct OpenAI embeddings API call. """ + # Prepare request parameters + request_params = { + "model": await self._get_provider_model_id(params.model), + "input": params.input, + "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, + "user": params.user if params.user is not None else NOT_GIVEN, + } + + # Add extra_body if present + extra_body = params.model_extra + if extra_body: + request_params["extra_body"] = extra_body + # Call OpenAI embeddings API with properly typed parameters - response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, - ) + response = await self.client.embeddings.create(**request_params) data = [] for i, embedding_data in enumerate(response.data): @@ -350,7 +355,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index ddfef9ba2..2d8d14512 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -19,6 +19,8 @@ from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, SearchRankingOptions, VectorStoreChunkingStrategy, @@ -340,39 +342,37 @@ class OpenAIVectorStoreMixin(ABC): async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - provider_vector_db_id: str | None = None, + params: OpenAICreateVectorStoreRequestWithExtraBody, ) -> VectorStoreObject: """Creates a vector store.""" created_at = int(time.time()) + + # Extract provider_vector_db_id from extra_body if present + provider_vector_db_id = None + if params.model_extra and "provider_vector_db_id" in params.model_extra: + provider_vector_db_id = params.model_extra["provider_vector_db_id"] + # Derive the canonical vector_db_id (allow override, else generate) vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") - if provider_id is None: + if params.provider_id is None: raise ValueError("Provider ID is required") - if embedding_model is None: + if params.embedding_model is None: raise ValueError("Embedding model is required") # Embedding dimension is required (defaulted to 384 if not provided) - if embedding_dimension is None: + if params.embedding_dimension is None: raise ValueError("Embedding dimension is required") # Register the VectorDB backing this vector store vector_db = VectorDB( identifier=vector_db_id, - embedding_dimension=embedding_dimension, - embedding_model=embedding_model, - provider_id=provider_id, + embedding_dimension=params.embedding_dimension, + embedding_model=params.embedding_model, + provider_id=params.provider_id, provider_resource_id=vector_db_id, - vector_db_name=name, + vector_db_name=params.name, ) await self.register_vector_db(vector_db) @@ -391,21 +391,21 @@ class OpenAIVectorStoreMixin(ABC): "id": vector_db_id, "object": "vector_store", "created_at": created_at, - "name": name, + "name": params.name, "usage_bytes": 0, "file_counts": file_counts.model_dump(), "status": status, - "expires_after": expires_after, + "expires_after": params.expires_after, "expires_at": None, "last_active_at": created_at, "file_ids": [], - "chunking_strategy": chunking_strategy, + "chunking_strategy": params.chunking_strategy, } # Add provider information to metadata if provided - metadata = metadata or {} - if provider_id: - metadata["provider_id"] = provider_id + metadata = params.metadata or {} + if params.provider_id: + metadata["provider_id"] = params.provider_id if provider_vector_db_id: metadata["provider_vector_db_id"] = provider_vector_db_id store_info["metadata"] = metadata @@ -417,7 +417,7 @@ class OpenAIVectorStoreMixin(ABC): self.openai_vector_stores[vector_db_id] = store_info # Now that our vector store is created, attach any files that were provided - file_ids = file_ids or [] + file_ids = params.file_ids or [] tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) @@ -976,15 +976,13 @@ class OpenAIVectorStoreMixin(ABC): async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody, ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) - chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto() + chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto() created_at = int(time.time()) batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}") @@ -996,8 +994,8 @@ class OpenAIVectorStoreMixin(ABC): completed=0, cancelled=0, failed=0, - in_progress=len(file_ids), - total=len(file_ids), + in_progress=len(params.file_ids), + total=len(params.file_ids), ) # Create batch object immediately with in_progress status @@ -1011,8 +1009,8 @@ class OpenAIVectorStoreMixin(ABC): batch_info = { **batch_object.model_dump(), - "file_ids": file_ids, - "attributes": attributes, + "file_ids": params.file_ids, + "attributes": params.attributes, "chunking_strategy": chunking_strategy.model_dump(), "expires_at": expires_at, }