feat(api)!: support extra_body to embeddings and vector_stores APIs (#3794)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 0s
Python Package Build Test / build (3.12) (push) Failing after 1s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
UI Tests / ui-tests (22) (push) Successful in 40s
Pre-commit / pre-commit (push) Successful in 1m23s

Applies the same pattern from
https://github.com/llamastack/llama-stack/pull/3777 to embeddings and
vector_stores.create() endpoints.

This should _not_ be a breaking change since (a) our tests were already
using the `extra_body` parameter when passing in to the backend (b) but
the backend probably wasn't extracting the parameters correctly. This PR
will fix that.

Updated APIs: `openai_embeddings(), openai_create_vector_store(),
openai_create_vector_store_file_batch()`
This commit is contained in:
Ashwin Bharambe 2025-10-12 19:01:52 -07:00 committed by GitHub
parent 3bb6ef351b
commit ecc8a554d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 451 additions and 426 deletions

View file

@ -1662,7 +1662,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiEmbeddingsRequest" "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody"
} }
} }
}, },
@ -2436,13 +2436,13 @@
"VectorIO" "VectorIO"
], ],
"summary": "Creates a vector store.", "summary": "Creates a vector store.",
"description": "Creates a vector store.", "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody"
} }
} }
}, },
@ -2622,7 +2622,7 @@
"VectorIO" "VectorIO"
], ],
"summary": "Create a vector store file batch.", "summary": "Create a vector store file batch.",
"description": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.",
"parameters": [ "parameters": [
{ {
"name": "vector_store_id", "name": "vector_store_id",
@ -2638,7 +2638,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody"
} }
} }
}, },
@ -8174,7 +8174,7 @@
"title": "OpenAICompletionChoice", "title": "OpenAICompletionChoice",
"description": "A choice from an OpenAI-compatible completion response." "description": "A choice from an OpenAI-compatible completion response."
}, },
"OpenaiEmbeddingsRequest": { "OpenAIEmbeddingsRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"model": { "model": {
@ -8197,6 +8197,7 @@
}, },
"encoding_format": { "encoding_format": {
"type": "string", "type": "string",
"default": "float",
"description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
}, },
"dimensions": { "dimensions": {
@ -8213,7 +8214,8 @@
"model", "model",
"input" "input"
], ],
"title": "OpenaiEmbeddingsRequest" "title": "OpenAIEmbeddingsRequestWithExtraBody",
"description": "Request parameters for OpenAI-compatible embeddings endpoint."
}, },
"OpenAIEmbeddingData": { "OpenAIEmbeddingData": {
"type": "object", "type": "object",
@ -12061,19 +12063,19 @@
"title": "VectorStoreObject", "title": "VectorStoreObject",
"description": "OpenAI Vector Store object." "description": "OpenAI Vector Store object."
}, },
"OpenaiCreateVectorStoreRequest": { "OpenAICreateVectorStoreRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "name": {
"type": "string", "type": "string",
"description": "A name for the vector store." "description": "(Optional) A name for the vector store"
}, },
"file_ids": { "file_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." "description": "List of file IDs to include in the vector store"
}, },
"expires_after": { "expires_after": {
"type": "object", "type": "object",
@ -12099,7 +12101,7 @@
} }
] ]
}, },
"description": "The expiration policy for a vector store." "description": "(Optional) Expiration policy for the vector store"
}, },
"chunking_strategy": { "chunking_strategy": {
"type": "object", "type": "object",
@ -12125,7 +12127,7 @@
} }
] ]
}, },
"description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." "description": "(Optional) Strategy for splitting files into chunks"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -12151,23 +12153,12 @@
} }
] ]
}, },
"description": "Set of 16 key-value pairs that can be attached to an object." "description": "Set of key-value pairs that can be attached to the vector store"
},
"embedding_model": {
"type": "string",
"description": "The embedding model to use for this vector store."
},
"embedding_dimension": {
"type": "integer",
"description": "The dimension of the embedding vectors (default: 384)."
},
"provider_id": {
"type": "string",
"description": "The ID of the provider to use for this vector store."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "OpenaiCreateVectorStoreRequest" "title": "OpenAICreateVectorStoreRequestWithExtraBody",
"description": "Request to create a vector store with extra_body support."
}, },
"OpenaiUpdateVectorStoreRequest": { "OpenaiUpdateVectorStoreRequest": {
"type": "object", "type": "object",
@ -12337,7 +12328,7 @@
"title": "VectorStoreChunkingStrategyStaticConfig", "title": "VectorStoreChunkingStrategyStaticConfig",
"description": "Configuration for static chunking strategy." "description": "Configuration for static chunking strategy."
}, },
"OpenaiCreateVectorStoreFileBatchRequest": { "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"file_ids": { "file_ids": {
@ -12345,7 +12336,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use." "description": "A list of File IDs that the vector store should use"
}, },
"attributes": { "attributes": {
"type": "object", "type": "object",
@ -12371,18 +12362,19 @@
} }
] ]
}, },
"description": "(Optional) Key-value attributes to store with the files." "description": "(Optional) Key-value attributes to store with the files"
}, },
"chunking_strategy": { "chunking_strategy": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategy", "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
"description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"file_ids" "file_ids"
], ],
"title": "OpenaiCreateVectorStoreFileBatchRequest" "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
"description": "Request to create a vector store file batch with extra_body support."
}, },
"VectorStoreFileBatchObject": { "VectorStoreFileBatchObject": {
"type": "object", "type": "object",

View file

@ -1203,7 +1203,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiEmbeddingsRequest' $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
required: true required: true
deprecated: true deprecated: true
/v1/openai/v1/files: /v1/openai/v1/files:
@ -1792,13 +1792,16 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Creates a vector store. summary: Creates a vector store.
description: Creates a vector store. description: >-
Creates a vector store.
Generate an OpenAI-compatible vector store with the given parameters.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
required: true required: true
deprecated: true deprecated: true
/v1/openai/v1/vector_stores/{vector_store_id}: /v1/openai/v1/vector_stores/{vector_store_id}:
@ -1924,7 +1927,11 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Create a vector store file batch. summary: Create a vector store file batch.
description: Create a vector store file batch. description: >-
Create a vector store file batch.
Generate an OpenAI-compatible vector store file batch for the given vector
store.
parameters: parameters:
- name: vector_store_id - name: vector_store_id
in: path in: path
@ -1937,7 +1944,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
required: true required: true
deprecated: true deprecated: true
/v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
@ -6035,7 +6042,7 @@ components:
title: OpenAICompletionChoice title: OpenAICompletionChoice
description: >- description: >-
A choice from an OpenAI-compatible completion response. A choice from an OpenAI-compatible completion response.
OpenaiEmbeddingsRequest: OpenAIEmbeddingsRequestWithExtraBody:
type: object type: object
properties: properties:
model: model:
@ -6054,6 +6061,7 @@ components:
multiple inputs in a single request, pass an array of strings. multiple inputs in a single request, pass an array of strings.
encoding_format: encoding_format:
type: string type: string
default: float
description: >- description: >-
(Optional) The format to return the embeddings in. Can be either "float" (Optional) The format to return the embeddings in. Can be either "float"
or "base64". Defaults to "float". or "base64". Defaults to "float".
@ -6071,7 +6079,9 @@ components:
required: required:
- model - model
- input - input
title: OpenaiEmbeddingsRequest title: OpenAIEmbeddingsRequestWithExtraBody
description: >-
Request parameters for OpenAI-compatible embeddings endpoint.
OpenAIEmbeddingData: OpenAIEmbeddingData:
type: object type: object
properties: properties:
@ -9147,19 +9157,18 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
OpenaiCreateVectorStoreRequest: "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
name: name:
type: string type: string
description: A name for the vector store. description: (Optional) A name for the vector store
file_ids: file_ids:
type: array type: array
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. Useful for tools List of file IDs to include in the vector store
like `file_search` that can access files.
expires_after: expires_after:
type: object type: object
additionalProperties: additionalProperties:
@ -9171,7 +9180,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The expiration policy for a vector store. (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object type: object
additionalProperties: additionalProperties:
@ -9183,8 +9192,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The chunking strategy used to chunk the file(s). If not set, will use (Optional) Strategy for splitting files into chunks
the `auto` strategy.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -9196,21 +9204,12 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
Set of 16 key-value pairs that can be attached to an object. Set of key-value pairs that can be attached to the vector store
embedding_model:
type: string
description: >-
The embedding model to use for this vector store.
embedding_dimension:
type: integer
description: >-
The dimension of the embedding vectors (default: 384).
provider_id:
type: string
description: >-
The ID of the provider to use for this vector store.
additionalProperties: false additionalProperties: false
title: OpenaiCreateVectorStoreRequest title: >-
OpenAICreateVectorStoreRequestWithExtraBody
description: >-
Request to create a vector store with extra_body support.
OpenaiUpdateVectorStoreRequest: OpenaiUpdateVectorStoreRequest:
type: object type: object
properties: properties:
@ -9331,7 +9330,7 @@ components:
title: VectorStoreChunkingStrategyStaticConfig title: VectorStoreChunkingStrategyStaticConfig
description: >- description: >-
Configuration for static chunking strategy. Configuration for static chunking strategy.
OpenaiCreateVectorStoreFileBatchRequest: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
file_ids: file_ids:
@ -9339,7 +9338,7 @@ components:
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. A list of File IDs that the vector store should use
attributes: attributes:
type: object type: object
additionalProperties: additionalProperties:
@ -9351,16 +9350,19 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
(Optional) Key-value attributes to store with the files. (Optional) Key-value attributes to store with the files
chunking_strategy: chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy' $ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >- description: >-
(Optional) The chunking strategy used to chunk the file(s). Defaults to (Optional) The chunking strategy used to chunk the file(s). Defaults to
auto. auto
additionalProperties: false additionalProperties: false
required: required:
- file_ids - file_ids
title: OpenaiCreateVectorStoreFileBatchRequest title: >-
OpenAICreateVectorStoreFileBatchRequestWithExtraBody
description: >-
Request to create a vector store file batch with extra_body support.
VectorStoreFileBatchObject: VectorStoreFileBatchObject:
type: object type: object
properties: properties:

View file

@ -765,7 +765,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiEmbeddingsRequest" "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody"
} }
} }
}, },
@ -3170,13 +3170,13 @@
"VectorIO" "VectorIO"
], ],
"summary": "Creates a vector store.", "summary": "Creates a vector store.",
"description": "Creates a vector store.", "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody"
} }
} }
}, },
@ -3356,7 +3356,7 @@
"VectorIO" "VectorIO"
], ],
"summary": "Create a vector store file batch.", "summary": "Create a vector store file batch.",
"description": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.",
"parameters": [ "parameters": [
{ {
"name": "vector_store_id", "name": "vector_store_id",
@ -3372,7 +3372,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody"
} }
} }
}, },
@ -6324,7 +6324,7 @@
"title": "ConversationItemDeletedResource", "title": "ConversationItemDeletedResource",
"description": "Response for deleted conversation item." "description": "Response for deleted conversation item."
}, },
"OpenaiEmbeddingsRequest": { "OpenAIEmbeddingsRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"model": { "model": {
@ -6347,6 +6347,7 @@
}, },
"encoding_format": { "encoding_format": {
"type": "string", "type": "string",
"default": "float",
"description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
}, },
"dimensions": { "dimensions": {
@ -6363,7 +6364,8 @@
"model", "model",
"input" "input"
], ],
"title": "OpenaiEmbeddingsRequest" "title": "OpenAIEmbeddingsRequestWithExtraBody",
"description": "Request parameters for OpenAI-compatible embeddings endpoint."
}, },
"OpenAIEmbeddingData": { "OpenAIEmbeddingData": {
"type": "object", "type": "object",
@ -12587,19 +12589,19 @@
"title": "VectorStoreObject", "title": "VectorStoreObject",
"description": "OpenAI Vector Store object." "description": "OpenAI Vector Store object."
}, },
"OpenaiCreateVectorStoreRequest": { "OpenAICreateVectorStoreRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "name": {
"type": "string", "type": "string",
"description": "A name for the vector store." "description": "(Optional) A name for the vector store"
}, },
"file_ids": { "file_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." "description": "List of file IDs to include in the vector store"
}, },
"expires_after": { "expires_after": {
"type": "object", "type": "object",
@ -12625,7 +12627,7 @@
} }
] ]
}, },
"description": "The expiration policy for a vector store." "description": "(Optional) Expiration policy for the vector store"
}, },
"chunking_strategy": { "chunking_strategy": {
"type": "object", "type": "object",
@ -12651,7 +12653,7 @@
} }
] ]
}, },
"description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." "description": "(Optional) Strategy for splitting files into chunks"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -12677,23 +12679,12 @@
} }
] ]
}, },
"description": "Set of 16 key-value pairs that can be attached to an object." "description": "Set of key-value pairs that can be attached to the vector store"
},
"embedding_model": {
"type": "string",
"description": "The embedding model to use for this vector store."
},
"embedding_dimension": {
"type": "integer",
"description": "The dimension of the embedding vectors (default: 384)."
},
"provider_id": {
"type": "string",
"description": "The ID of the provider to use for this vector store."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "OpenaiCreateVectorStoreRequest" "title": "OpenAICreateVectorStoreRequestWithExtraBody",
"description": "Request to create a vector store with extra_body support."
}, },
"OpenaiUpdateVectorStoreRequest": { "OpenaiUpdateVectorStoreRequest": {
"type": "object", "type": "object",
@ -12863,7 +12854,7 @@
"title": "VectorStoreChunkingStrategyStaticConfig", "title": "VectorStoreChunkingStrategyStaticConfig",
"description": "Configuration for static chunking strategy." "description": "Configuration for static chunking strategy."
}, },
"OpenaiCreateVectorStoreFileBatchRequest": { "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"file_ids": { "file_ids": {
@ -12871,7 +12862,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use." "description": "A list of File IDs that the vector store should use"
}, },
"attributes": { "attributes": {
"type": "object", "type": "object",
@ -12897,18 +12888,19 @@
} }
] ]
}, },
"description": "(Optional) Key-value attributes to store with the files." "description": "(Optional) Key-value attributes to store with the files"
}, },
"chunking_strategy": { "chunking_strategy": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategy", "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
"description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"file_ids" "file_ids"
], ],
"title": "OpenaiCreateVectorStoreFileBatchRequest" "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
"description": "Request to create a vector store file batch with extra_body support."
}, },
"VectorStoreFileBatchObject": { "VectorStoreFileBatchObject": {
"type": "object", "type": "object",

View file

@ -617,7 +617,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiEmbeddingsRequest' $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/files: /v1/files:
@ -2413,13 +2413,16 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Creates a vector store. summary: Creates a vector store.
description: Creates a vector store. description: >-
Creates a vector store.
Generate an OpenAI-compatible vector store with the given parameters.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/vector_stores/{vector_store_id}: /v1/vector_stores/{vector_store_id}:
@ -2545,7 +2548,11 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Create a vector store file batch. summary: Create a vector store file batch.
description: Create a vector store file batch. description: >-
Create a vector store file batch.
Generate an OpenAI-compatible vector store file batch for the given vector
store.
parameters: parameters:
- name: vector_store_id - name: vector_store_id
in: path in: path
@ -2558,7 +2565,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
@ -4797,7 +4804,7 @@ components:
- deleted - deleted
title: ConversationItemDeletedResource title: ConversationItemDeletedResource
description: Response for deleted conversation item. description: Response for deleted conversation item.
OpenaiEmbeddingsRequest: OpenAIEmbeddingsRequestWithExtraBody:
type: object type: object
properties: properties:
model: model:
@ -4816,6 +4823,7 @@ components:
multiple inputs in a single request, pass an array of strings. multiple inputs in a single request, pass an array of strings.
encoding_format: encoding_format:
type: string type: string
default: float
description: >- description: >-
(Optional) The format to return the embeddings in. Can be either "float" (Optional) The format to return the embeddings in. Can be either "float"
or "base64". Defaults to "float". or "base64". Defaults to "float".
@ -4833,7 +4841,9 @@ components:
required: required:
- model - model
- input - input
title: OpenaiEmbeddingsRequest title: OpenAIEmbeddingsRequestWithExtraBody
description: >-
Request parameters for OpenAI-compatible embeddings endpoint.
OpenAIEmbeddingData: OpenAIEmbeddingData:
type: object type: object
properties: properties:
@ -9612,19 +9622,18 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
OpenaiCreateVectorStoreRequest: "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
name: name:
type: string type: string
description: A name for the vector store. description: (Optional) A name for the vector store
file_ids: file_ids:
type: array type: array
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. Useful for tools List of file IDs to include in the vector store
like `file_search` that can access files.
expires_after: expires_after:
type: object type: object
additionalProperties: additionalProperties:
@ -9636,7 +9645,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The expiration policy for a vector store. (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object type: object
additionalProperties: additionalProperties:
@ -9648,8 +9657,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The chunking strategy used to chunk the file(s). If not set, will use (Optional) Strategy for splitting files into chunks
the `auto` strategy.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -9661,21 +9669,12 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
Set of 16 key-value pairs that can be attached to an object. Set of key-value pairs that can be attached to the vector store
embedding_model:
type: string
description: >-
The embedding model to use for this vector store.
embedding_dimension:
type: integer
description: >-
The dimension of the embedding vectors (default: 384).
provider_id:
type: string
description: >-
The ID of the provider to use for this vector store.
additionalProperties: false additionalProperties: false
title: OpenaiCreateVectorStoreRequest title: >-
OpenAICreateVectorStoreRequestWithExtraBody
description: >-
Request to create a vector store with extra_body support.
OpenaiUpdateVectorStoreRequest: OpenaiUpdateVectorStoreRequest:
type: object type: object
properties: properties:
@ -9796,7 +9795,7 @@ components:
title: VectorStoreChunkingStrategyStaticConfig title: VectorStoreChunkingStrategyStaticConfig
description: >- description: >-
Configuration for static chunking strategy. Configuration for static chunking strategy.
OpenaiCreateVectorStoreFileBatchRequest: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
file_ids: file_ids:
@ -9804,7 +9803,7 @@ components:
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. A list of File IDs that the vector store should use
attributes: attributes:
type: object type: object
additionalProperties: additionalProperties:
@ -9816,16 +9815,19 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
(Optional) Key-value attributes to store with the files. (Optional) Key-value attributes to store with the files
chunking_strategy: chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy' $ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >- description: >-
(Optional) The chunking strategy used to chunk the file(s). Defaults to (Optional) The chunking strategy used to chunk the file(s). Defaults to
auto. auto
additionalProperties: false additionalProperties: false
required: required:
- file_ids - file_ids
title: OpenaiCreateVectorStoreFileBatchRequest title: >-
OpenAICreateVectorStoreFileBatchRequestWithExtraBody
description: >-
Request to create a vector store file batch with extra_body support.
VectorStoreFileBatchObject: VectorStoreFileBatchObject:
type: object type: object
properties: properties:

View file

@ -765,7 +765,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiEmbeddingsRequest" "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody"
} }
} }
}, },
@ -3170,13 +3170,13 @@
"VectorIO" "VectorIO"
], ],
"summary": "Creates a vector store.", "summary": "Creates a vector store.",
"description": "Creates a vector store.", "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody"
} }
} }
}, },
@ -3356,7 +3356,7 @@
"VectorIO" "VectorIO"
], ],
"summary": "Create a vector store file batch.", "summary": "Create a vector store file batch.",
"description": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.",
"parameters": [ "parameters": [
{ {
"name": "vector_store_id", "name": "vector_store_id",
@ -3372,7 +3372,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody"
} }
} }
}, },
@ -8333,7 +8333,7 @@
"title": "ConversationItemDeletedResource", "title": "ConversationItemDeletedResource",
"description": "Response for deleted conversation item." "description": "Response for deleted conversation item."
}, },
"OpenaiEmbeddingsRequest": { "OpenAIEmbeddingsRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"model": { "model": {
@ -8356,6 +8356,7 @@
}, },
"encoding_format": { "encoding_format": {
"type": "string", "type": "string",
"default": "float",
"description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
}, },
"dimensions": { "dimensions": {
@ -8372,7 +8373,8 @@
"model", "model",
"input" "input"
], ],
"title": "OpenaiEmbeddingsRequest" "title": "OpenAIEmbeddingsRequestWithExtraBody",
"description": "Request parameters for OpenAI-compatible embeddings endpoint."
}, },
"OpenAIEmbeddingData": { "OpenAIEmbeddingData": {
"type": "object", "type": "object",
@ -14596,19 +14598,19 @@
"title": "VectorStoreObject", "title": "VectorStoreObject",
"description": "OpenAI Vector Store object." "description": "OpenAI Vector Store object."
}, },
"OpenaiCreateVectorStoreRequest": { "OpenAICreateVectorStoreRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "name": {
"type": "string", "type": "string",
"description": "A name for the vector store." "description": "(Optional) A name for the vector store"
}, },
"file_ids": { "file_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." "description": "List of file IDs to include in the vector store"
}, },
"expires_after": { "expires_after": {
"type": "object", "type": "object",
@ -14634,7 +14636,7 @@
} }
] ]
}, },
"description": "The expiration policy for a vector store." "description": "(Optional) Expiration policy for the vector store"
}, },
"chunking_strategy": { "chunking_strategy": {
"type": "object", "type": "object",
@ -14660,7 +14662,7 @@
} }
] ]
}, },
"description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." "description": "(Optional) Strategy for splitting files into chunks"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -14686,23 +14688,12 @@
} }
] ]
}, },
"description": "Set of 16 key-value pairs that can be attached to an object." "description": "Set of key-value pairs that can be attached to the vector store"
},
"embedding_model": {
"type": "string",
"description": "The embedding model to use for this vector store."
},
"embedding_dimension": {
"type": "integer",
"description": "The dimension of the embedding vectors (default: 384)."
},
"provider_id": {
"type": "string",
"description": "The ID of the provider to use for this vector store."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "OpenaiCreateVectorStoreRequest" "title": "OpenAICreateVectorStoreRequestWithExtraBody",
"description": "Request to create a vector store with extra_body support."
}, },
"OpenaiUpdateVectorStoreRequest": { "OpenaiUpdateVectorStoreRequest": {
"type": "object", "type": "object",
@ -14872,7 +14863,7 @@
"title": "VectorStoreChunkingStrategyStaticConfig", "title": "VectorStoreChunkingStrategyStaticConfig",
"description": "Configuration for static chunking strategy." "description": "Configuration for static chunking strategy."
}, },
"OpenaiCreateVectorStoreFileBatchRequest": { "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": {
"type": "object", "type": "object",
"properties": { "properties": {
"file_ids": { "file_ids": {
@ -14880,7 +14871,7 @@
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "A list of File IDs that the vector store should use." "description": "A list of File IDs that the vector store should use"
}, },
"attributes": { "attributes": {
"type": "object", "type": "object",
@ -14906,18 +14897,19 @@
} }
] ]
}, },
"description": "(Optional) Key-value attributes to store with the files." "description": "(Optional) Key-value attributes to store with the files"
}, },
"chunking_strategy": { "chunking_strategy": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategy", "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
"description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"file_ids" "file_ids"
], ],
"title": "OpenaiCreateVectorStoreFileBatchRequest" "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
"description": "Request to create a vector store file batch with extra_body support."
}, },
"VectorStoreFileBatchObject": { "VectorStoreFileBatchObject": {
"type": "object", "type": "object",

View file

@ -620,7 +620,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiEmbeddingsRequest' $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/files: /v1/files:
@ -2416,13 +2416,16 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Creates a vector store. summary: Creates a vector store.
description: Creates a vector store. description: >-
Creates a vector store.
Generate an OpenAI-compatible vector store with the given parameters.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/vector_stores/{vector_store_id}: /v1/vector_stores/{vector_store_id}:
@ -2548,7 +2551,11 @@ paths:
tags: tags:
- VectorIO - VectorIO
summary: Create a vector store file batch. summary: Create a vector store file batch.
description: Create a vector store file batch. description: >-
Create a vector store file batch.
Generate an OpenAI-compatible vector store file batch for the given vector
store.
parameters: parameters:
- name: vector_store_id - name: vector_store_id
in: path in: path
@ -2561,7 +2568,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
required: true required: true
deprecated: false deprecated: false
/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
@ -6242,7 +6249,7 @@ components:
- deleted - deleted
title: ConversationItemDeletedResource title: ConversationItemDeletedResource
description: Response for deleted conversation item. description: Response for deleted conversation item.
OpenaiEmbeddingsRequest: OpenAIEmbeddingsRequestWithExtraBody:
type: object type: object
properties: properties:
model: model:
@ -6261,6 +6268,7 @@ components:
multiple inputs in a single request, pass an array of strings. multiple inputs in a single request, pass an array of strings.
encoding_format: encoding_format:
type: string type: string
default: float
description: >- description: >-
(Optional) The format to return the embeddings in. Can be either "float" (Optional) The format to return the embeddings in. Can be either "float"
or "base64". Defaults to "float". or "base64". Defaults to "float".
@ -6278,7 +6286,9 @@ components:
required: required:
- model - model
- input - input
title: OpenaiEmbeddingsRequest title: OpenAIEmbeddingsRequestWithExtraBody
description: >-
Request parameters for OpenAI-compatible embeddings endpoint.
OpenAIEmbeddingData: OpenAIEmbeddingData:
type: object type: object
properties: properties:
@ -11057,19 +11067,18 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
OpenaiCreateVectorStoreRequest: "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
name: name:
type: string type: string
description: A name for the vector store. description: (Optional) A name for the vector store
file_ids: file_ids:
type: array type: array
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. Useful for tools List of file IDs to include in the vector store
like `file_search` that can access files.
expires_after: expires_after:
type: object type: object
additionalProperties: additionalProperties:
@ -11081,7 +11090,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The expiration policy for a vector store. (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object type: object
additionalProperties: additionalProperties:
@ -11093,8 +11102,7 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
The chunking strategy used to chunk the file(s). If not set, will use (Optional) Strategy for splitting files into chunks
the `auto` strategy.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -11106,21 +11114,12 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
Set of 16 key-value pairs that can be attached to an object. Set of key-value pairs that can be attached to the vector store
embedding_model:
type: string
description: >-
The embedding model to use for this vector store.
embedding_dimension:
type: integer
description: >-
The dimension of the embedding vectors (default: 384).
provider_id:
type: string
description: >-
The ID of the provider to use for this vector store.
additionalProperties: false additionalProperties: false
title: OpenaiCreateVectorStoreRequest title: >-
OpenAICreateVectorStoreRequestWithExtraBody
description: >-
Request to create a vector store with extra_body support.
OpenaiUpdateVectorStoreRequest: OpenaiUpdateVectorStoreRequest:
type: object type: object
properties: properties:
@ -11241,7 +11240,7 @@ components:
title: VectorStoreChunkingStrategyStaticConfig title: VectorStoreChunkingStrategyStaticConfig
description: >- description: >-
Configuration for static chunking strategy. Configuration for static chunking strategy.
OpenaiCreateVectorStoreFileBatchRequest: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
file_ids: file_ids:
@ -11249,7 +11248,7 @@ components:
items: items:
type: string type: string
description: >- description: >-
A list of File IDs that the vector store should use. A list of File IDs that the vector store should use
attributes: attributes:
type: object type: object
additionalProperties: additionalProperties:
@ -11261,16 +11260,19 @@ components:
- type: array - type: array
- type: object - type: object
description: >- description: >-
(Optional) Key-value attributes to store with the files. (Optional) Key-value attributes to store with the files
chunking_strategy: chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy' $ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >- description: >-
(Optional) The chunking strategy used to chunk the file(s). Defaults to (Optional) The chunking strategy used to chunk the file(s). Defaults to
auto. auto
additionalProperties: false additionalProperties: false
required: required:
- file_ids - file_ids
title: OpenaiCreateVectorStoreFileBatchRequest title: >-
OpenAICreateVectorStoreFileBatchRequestWithExtraBody
description: >-
Request to create a vector store file batch with extra_body support.
VectorStoreFileBatchObject: VectorStoreFileBatchObject:
type: object type: object
properties: properties:

View file

@ -1140,6 +1140,25 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
user: str | None = None user: str | None = None
# extra_body can be accessed via .model_extra
@json_schema_type
class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
"""Request parameters for OpenAI-compatible embeddings endpoint.
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
"""
model: str
input: str | list[str]
encoding_format: str | None = "float"
dimensions: int | None = None
user: str | None = None
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class InferenceProvider(Protocol): class InferenceProvider(Protocol):
@ -1200,21 +1219,11 @@ class InferenceProvider(Protocol):
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)],
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
"""Create embeddings. """Create embeddings.
Generate OpenAI-compatible embeddings for the given input using the specified model. Generate OpenAI-compatible embeddings for the given input using the specified model.
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
:returns: An OpenAIEmbeddingsResponse containing the embeddings. :returns: An OpenAIEmbeddingsResponse containing the embeddings.
""" """
... ...

View file

@ -11,6 +11,7 @@
import uuid import uuid
from typing import Annotated, Any, Literal, Protocol, runtime_checkable from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import InterleavedContent
@ -466,6 +467,40 @@ class VectorStoreFilesListInBatchResponse(BaseModel):
has_more: bool = False has_more: bool = False
# extra_body can be accessed via .model_extra
@json_schema_type
class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
"""Request to create a vector store with extra_body support.
:param name: (Optional) A name for the vector store
:param file_ids: List of file IDs to include in the vector store
:param expires_after: (Optional) Expiration policy for the vector store
:param chunking_strategy: (Optional) Strategy for splitting files into chunks
:param metadata: Set of key-value pairs that can be attached to the vector store
"""
name: str | None = None
file_ids: list[str] | None = None
expires_after: dict[str, Any] | None = None
chunking_strategy: dict[str, Any] | None = None
metadata: dict[str, Any] | None = None
# extra_body can be accessed via .model_extra
@json_schema_type
class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"):
"""Request to create a vector store file batch with extra_body support.
:param file_ids: A list of File IDs that the vector store should use
:param attributes: (Optional) Key-value attributes to store with the files
:param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto
"""
file_ids: list[str]
attributes: dict[str, Any] | None = None
chunking_strategy: VectorStoreChunkingStrategy | None = None
class VectorDBStore(Protocol): class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
@ -516,25 +551,11 @@ class VectorIO(Protocol):
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
name: str | None = None, params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
file_ids: list[str] | None = None,
expires_after: dict[str, Any] | None = None,
chunking_strategy: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
embedding_model: str | None = None,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
) -> VectorStoreObject: ) -> VectorStoreObject:
"""Creates a vector store. """Creates a vector store.
:param name: A name for the vector store. Generate an OpenAI-compatible vector store with the given parameters.
:param file_ids: A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files.
:param expires_after: The expiration policy for a vector store.
:param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
:param metadata: Set of 16 key-value pairs that can be attached to an object.
:param embedding_model: The embedding model to use for this vector store.
:param embedding_dimension: The dimension of the embedding vectors (default: 384).
:param provider_id: The ID of the provider to use for this vector store.
:returns: A VectorStoreObject representing the created vector store. :returns: A VectorStoreObject representing the created vector store.
""" """
... ...
@ -827,16 +848,12 @@ class VectorIO(Protocol):
async def openai_create_vector_store_file_batch( async def openai_create_vector_store_file_batch(
self, self,
vector_store_id: str, vector_store_id: str,
file_ids: list[str], params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileBatchObject: ) -> VectorStoreFileBatchObject:
"""Create a vector store file batch. """Create a vector store file batch.
Generate an OpenAI-compatible vector store file batch for the given vector store.
:param vector_store_id: The ID of the vector store to create the file batch for. :param vector_store_id: The ID of the vector store to create the file batch for.
:param file_ids: A list of File IDs that the vector store should use.
:param attributes: (Optional) Key-value attributes to store with the files.
:param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
:returns: A VectorStoreFileBatchObject representing the created file batch. :returns: A VectorStoreFileBatchObject representing the created file batch.
""" """
... ...

View file

@ -513,6 +513,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
# Strip NOT_GIVENs to use the defaults in signature # Strip NOT_GIVENs to use the defaults in signature
body = {k: v for k, v in body.items() if v is not NOT_GIVEN} body = {k: v for k, v in body.items() if v is not NOT_GIVEN}
# Check if there's an unwrapped body parameter among multiple parameters
# (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)])
unwrapped_body_param = None
for param in params_list:
if is_unwrapped_body_param(param.annotation):
unwrapped_body_param = param
break
# Convert parameters to Pydantic models where needed # Convert parameters to Pydantic models where needed
converted_body = {} converted_body = {}
for param_name, param in sig.parameters.items(): for param_name, param in sig.parameters.items():
@ -522,5 +530,11 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
converted_body[param_name] = value converted_body[param_name] = value
else: else:
converted_body[param_name] = convert_to_pydantic(param.annotation, value) converted_body[param_name] = convert_to_pydantic(param.annotation, value)
elif unwrapped_body_param and param.name == unwrapped_body_param.name:
# This is the unwrapped body param - construct it from remaining body keys
base_type = get_args(param.annotation)[0]
# Extract only the keys that aren't already used by other params
remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
converted_body[param.name] = base_type(**remaining_keys)
return converted_body return converted_body

View file

@ -40,6 +40,7 @@ from llama_stack.apis.inference import (
OpenAICompletion, OpenAICompletion,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAICompletionWithInputMessages, OpenAICompletionWithInputMessages,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam, OpenAIMessageParam,
Order, Order,
@ -279,26 +280,18 @@ class InferenceRouter(Inference):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)],
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
logger.debug( logger.debug(
f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}", f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}",
)
model_obj = await self._get_model(model, ModelType.embedding)
params = dict(
model=model_obj.identifier,
input=input,
encoding_format=encoding_format,
dimensions=dimensions,
user=user,
) )
model_obj = await self._get_model(params.model, ModelType.embedding)
# Update model to use resolved identifier
params.model = model_obj.identifier
provider = await self.routing_table.get_provider_impl(model_obj.identifier) provider = await self.routing_table.get_provider_impl(model_obj.identifier)
return await provider.openai_embeddings(**params) return await provider.openai_embeddings(params)
async def list_chat_completions( async def list_chat_completions(
self, self,

View file

@ -6,12 +6,16 @@
import asyncio import asyncio
import uuid import uuid
from typing import Any from typing import Annotated, Any
from fastapi import Body
from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
OpenAICreateVectorStoreRequestWithExtraBody,
QueryChunksResponse, QueryChunksResponse,
SearchRankingOptions, SearchRankingOptions,
VectorIO, VectorIO,
@ -120,18 +124,19 @@ class VectorIORouter(VectorIO):
# OpenAI Vector Stores API endpoints # OpenAI Vector Stores API endpoints
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
name: str, params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
file_ids: list[str] | None = None,
expires_after: dict[str, Any] | None = None,
chunking_strategy: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
embedding_model: str | None = None,
embedding_dimension: int | None = None,
provider_id: str | None = None,
) -> VectorStoreObject: ) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}") # Extract llama-stack-specific parameters from extra_body
extra = params.model_extra or {}
embedding_model = extra.get("embedding_model")
embedding_dimension = extra.get("embedding_dimension", 384)
provider_id = extra.get("provider_id")
logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}")
# If no embedding model is provided, use the first available one # If no embedding model is provided, use the first available one
# TODO: this branch will soon be deleted so you _must_ provide the embedding_model when
# creating a vector store
if embedding_model is None: if embedding_model is None:
embedding_model_info = await self._get_first_embedding_model() embedding_model_info = await self._get_first_embedding_model()
if embedding_model_info is None: if embedding_model_info is None:
@ -146,20 +151,19 @@ class VectorIORouter(VectorIO):
embedding_dimension=embedding_dimension, embedding_dimension=embedding_dimension,
provider_id=provider_id, provider_id=provider_id,
provider_vector_db_id=vector_db_id, provider_vector_db_id=vector_db_id,
vector_db_name=name, vector_db_name=params.name,
) )
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
return await provider.openai_create_vector_store(
name=name, # Update model_extra with registered values so provider uses the already-registered vector_db
file_ids=file_ids, if params.model_extra is None:
expires_after=expires_after, params.model_extra = {}
chunking_strategy=chunking_strategy, params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
metadata=metadata, params.model_extra["provider_id"] = registered_vector_db.provider_id
embedding_model=embedding_model, params.model_extra["embedding_model"] = embedding_model
embedding_dimension=embedding_dimension, params.model_extra["embedding_dimension"] = embedding_dimension
provider_id=registered_vector_db.provider_id,
provider_vector_db_id=registered_vector_db.provider_resource_id, return await provider.openai_create_vector_store(params)
)
async def openai_list_vector_stores( async def openai_list_vector_stores(
self, self,
@ -219,7 +223,8 @@ class VectorIORouter(VectorIO):
vector_store_id: str, vector_store_id: str,
) -> VectorStoreObject: ) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}")
return await self.routing_table.openai_retrieve_vector_store(vector_store_id) provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store(vector_store_id)
async def openai_update_vector_store( async def openai_update_vector_store(
self, self,
@ -229,7 +234,8 @@ class VectorIORouter(VectorIO):
metadata: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None,
) -> VectorStoreObject: ) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
return await self.routing_table.openai_update_vector_store( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
name=name, name=name,
expires_after=expires_after, expires_after=expires_after,
@ -241,7 +247,8 @@ class VectorIORouter(VectorIO):
vector_store_id: str, vector_store_id: str,
) -> VectorStoreDeleteResponse: ) -> VectorStoreDeleteResponse:
logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
return await self.routing_table.openai_delete_vector_store(vector_store_id) provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store(vector_store_id)
async def openai_search_vector_store( async def openai_search_vector_store(
self, self,
@ -254,7 +261,8 @@ class VectorIORouter(VectorIO):
search_mode: str | None = "vector", search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage: ) -> VectorStoreSearchResponsePage:
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
return await self.routing_table.openai_search_vector_store( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_search_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
query=query, query=query,
filters=filters, filters=filters,
@ -272,7 +280,8 @@ class VectorIORouter(VectorIO):
chunking_strategy: VectorStoreChunkingStrategy | None = None, chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
return await self.routing_table.openai_attach_file_to_vector_store( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
file_id=file_id, file_id=file_id,
attributes=attributes, attributes=attributes,
@ -289,7 +298,8 @@ class VectorIORouter(VectorIO):
filter: VectorStoreFileStatus | None = None, filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]: ) -> list[VectorStoreFileObject]:
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
return await self.routing_table.openai_list_files_in_vector_store( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
limit=limit, limit=limit,
order=order, order=order,
@ -304,7 +314,8 @@ class VectorIORouter(VectorIO):
file_id: str, file_id: str,
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}")
return await self.routing_table.openai_retrieve_vector_store_file( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
file_id=file_id, file_id=file_id,
) )
@ -315,7 +326,8 @@ class VectorIORouter(VectorIO):
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentsResponse:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
return await self.routing_table.openai_retrieve_vector_store_file_contents( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
file_id=file_id, file_id=file_id,
) )
@ -327,7 +339,8 @@ class VectorIORouter(VectorIO):
attributes: dict[str, Any], attributes: dict[str, Any],
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}")
return await self.routing_table.openai_update_vector_store_file( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
file_id=file_id, file_id=file_id,
attributes=attributes, attributes=attributes,
@ -339,7 +352,8 @@ class VectorIORouter(VectorIO):
file_id: str, file_id: str,
) -> VectorStoreFileDeleteResponse: ) -> VectorStoreFileDeleteResponse:
logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}")
return await self.routing_table.openai_delete_vector_store_file( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
file_id=file_id, file_id=file_id,
) )
@ -370,17 +384,13 @@ class VectorIORouter(VectorIO):
async def openai_create_vector_store_file_batch( async def openai_create_vector_store_file_batch(
self, self,
vector_store_id: str, vector_store_id: str,
file_ids: list[str], params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileBatchObject: ) -> VectorStoreFileBatchObject:
logger.debug(f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(file_ids)} files") logger.debug(
return await self.routing_table.openai_create_vector_store_file_batch( f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files"
vector_store_id=vector_store_id,
file_ids=file_ids,
attributes=attributes,
chunking_strategy=chunking_strategy,
) )
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_create_vector_store_file_batch(vector_store_id, params)
async def openai_retrieve_vector_store_file_batch( async def openai_retrieve_vector_store_file_batch(
self, self,
@ -388,7 +398,8 @@ class VectorIORouter(VectorIO):
vector_store_id: str, vector_store_id: str,
) -> VectorStoreFileBatchObject: ) -> VectorStoreFileBatchObject:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}")
return await self.routing_table.openai_retrieve_vector_store_file_batch( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
) )
@ -404,7 +415,8 @@ class VectorIORouter(VectorIO):
order: str | None = "desc", order: str | None = "desc",
) -> VectorStoreFilesListInBatchResponse: ) -> VectorStoreFilesListInBatchResponse:
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}") logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}")
return await self.routing_table.openai_list_files_in_vector_store_file_batch( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
after=after, after=after,
@ -420,7 +432,8 @@ class VectorIORouter(VectorIO):
vector_store_id: str, vector_store_id: str,
) -> VectorStoreFileBatchObject: ) -> VectorStoreFileBatchObject:
logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}") logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}")
return await self.routing_table.openai_cancel_vector_store_file_batch( provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_cancel_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
) )

View file

@ -25,6 +25,7 @@ from llama_stack.apis.inference import (
OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIDeveloperMessageParam, OpenAIDeveloperMessageParam,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIMessageParam, OpenAIMessageParam,
OpenAISystemMessageParam, OpenAISystemMessageParam,
OpenAIToolMessageParam, OpenAIToolMessageParam,
@ -640,7 +641,9 @@ class ReferenceBatchesImpl(Batches):
}, },
} }
else: # /v1/embeddings else: # /v1/embeddings
embeddings_response = await self.inference_api.openai_embeddings(**request.body) embeddings_response = await self.inference_api.openai_embeddings(
OpenAIEmbeddingsRequestWithExtraBody(**request.body)
)
assert hasattr(embeddings_response, "model_dump_json"), ( assert hasattr(embeddings_response, "model_dump_json"), (
"Embeddings response must have model_dump_json method" "Embeddings response must have model_dump_json method"
) )

View file

@ -14,6 +14,7 @@ from llama_stack.apis.inference import (
Inference, Inference,
OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
@ -124,11 +125,7 @@ class BedrockInferenceAdapter(
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -6,7 +6,10 @@
from urllib.parse import urljoin from urllib.parse import urljoin
from llama_stack.apis.inference import OpenAIEmbeddingsResponse from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import CerebrasImplConfig from .config import CerebrasImplConfig
@ -20,10 +23,6 @@ class CerebrasInferenceAdapter(OpenAIMixin):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -7,6 +7,7 @@
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
OpenAICompletion, OpenAICompletion,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
) )
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -40,10 +41,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -9,6 +9,7 @@ from openai import NOT_GIVEN
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
) )
@ -78,11 +79,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
""" """
OpenAI-compatible embeddings for NVIDIA NIM. OpenAI-compatible embeddings for NVIDIA NIM.
@ -99,11 +96,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
) )
response = await self.client.embeddings.create( response = await self.client.embeddings.create(
model=await self._get_provider_model_id(model), model=await self._get_provider_model_id(params.model),
input=input, input=params.input,
encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
dimensions=dimensions if dimensions is not None else NOT_GIVEN, dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN,
user=user if user is not None else NOT_GIVEN, user=params.user if params.user is not None else NOT_GIVEN,
extra_body=extra_body, extra_body=extra_body,
) )

View file

@ -16,6 +16,7 @@ from llama_stack.apis.inference import (
OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletion, OpenAICompletion,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
) )
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
@ -69,11 +70,7 @@ class PassthroughInferenceAdapter(Inference):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -10,7 +10,10 @@ from collections.abc import Iterable
from huggingface_hub import AsyncInferenceClient, HfApi from huggingface_hub import AsyncInferenceClient, HfApi
from pydantic import SecretStr from pydantic import SecretStr
from llama_stack.apis.inference import OpenAIEmbeddingsResponse from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -40,11 +43,7 @@ class _HfAdapter(OpenAIMixin):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -11,6 +11,7 @@ from together import AsyncTogether
from together.constants import BASE_URL from together.constants import BASE_URL
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
) )
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
@ -62,11 +63,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
""" """
Together's OpenAI-compatible embeddings endpoint is not compatible with Together's OpenAI-compatible embeddings endpoint is not compatible with
@ -78,25 +75,27 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
- does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
""" """
# Together support ticket #13332 -> will not fix # Together support ticket #13332 -> will not fix
if user is not None: if params.user is not None:
raise ValueError("Together's embeddings endpoint does not support user param.") raise ValueError("Together's embeddings endpoint does not support user param.")
# Together support ticket #13333 -> escalated # Together support ticket #13333 -> escalated
if dimensions is not None: if params.dimensions is not None:
raise ValueError("Together's embeddings endpoint does not support dimensions param.") raise ValueError("Together's embeddings endpoint does not support dimensions param.")
response = await self.client.embeddings.create( response = await self.client.embeddings.create(
model=await self._get_provider_model_id(model), model=await self._get_provider_model_id(params.model),
input=input, input=params.input,
encoding_format=encoding_format, encoding_format=params.encoding_format,
) )
response.model = model # return the user the same model id they provided, avoid exposing the provider model id response.model = (
params.model
) # return the user the same model id they provided, avoid exposing the provider model id
# Together support ticket #13330 -> escalated # Together support ticket #13330 -> escalated
# - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information # - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information
if not hasattr(response, "usage") or response.usage is None: if not hasattr(response, "usage") or response.usage is None:
logger.warning( logger.warning(
f"Together's embedding endpoint for {model} did not return usage information, substituting -1s." f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s."
) )
response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)

View file

@ -17,6 +17,7 @@ if TYPE_CHECKING:
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
ModelStore, ModelStore,
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
) )
@ -32,26 +33,22 @@ class SentenceTransformerEmbeddingMixin:
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
# Convert input to list format if it's a single string # Convert input to list format if it's a single string
input_list = [input] if isinstance(input, str) else input input_list = [params.input] if isinstance(params.input, str) else params.input
if not input_list: if not input_list:
raise ValueError("Empty list not supported") raise ValueError("Empty list not supported")
# Get the model and generate embeddings # Get the model and generate embeddings
model_obj = await self.model_store.get_model(model) model_obj = await self.model_store.get_model(params.model)
embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id) embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False) embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
# Convert embeddings to the requested format # Convert embeddings to the requested format
data = [] data = []
for i, embedding in enumerate(embeddings): for i, embedding in enumerate(embeddings):
if encoding_format == "base64": if params.encoding_format == "base64":
# Convert float array to base64 string # Convert float array to base64 string
float_bytes = struct.pack(f"{len(embedding)}f", *embedding) float_bytes = struct.pack(f"{len(embedding)}f", *embedding)
embedding_value = base64.b64encode(float_bytes).decode("ascii") embedding_value = base64.b64encode(float_bytes).decode("ascii")
@ -70,7 +67,7 @@ class SentenceTransformerEmbeddingMixin:
usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
return OpenAIEmbeddingsResponse( return OpenAIEmbeddingsResponse(
data=data, data=data,
model=model, model=params.model,
usage=usage, usage=usage,
) )

View file

@ -20,6 +20,7 @@ from llama_stack.apis.inference import (
OpenAICompletion, OpenAICompletion,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
ToolChoice, ToolChoice,
@ -189,16 +190,12 @@ class LiteLLMOpenAIMixin(
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
model_obj = await self.model_store.get_model(model) model_obj = await self.model_store.get_model(params.model)
# Convert input to list if it's a string # Convert input to list if it's a string
input_list = [input] if isinstance(input, str) else input input_list = [params.input] if isinstance(params.input, str) else params.input
# Call litellm embedding function # Call litellm embedding function
# litellm.drop_params = True # litellm.drop_params = True
@ -207,11 +204,11 @@ class LiteLLMOpenAIMixin(
input=input_list, input=input_list,
api_key=self.get_api_key(), api_key=self.get_api_key(),
api_base=self.api_base, api_base=self.api_base,
dimensions=dimensions, dimensions=params.dimensions,
) )
# Convert response to OpenAI format # Convert response to OpenAI format
data = b64_encode_openai_embeddings_response(response.data, encoding_format) data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
usage = OpenAIEmbeddingUsage( usage = OpenAIEmbeddingUsage(
prompt_tokens=response["usage"]["prompt_tokens"], prompt_tokens=response["usage"]["prompt_tokens"],

View file

@ -21,6 +21,7 @@ from llama_stack.apis.inference import (
OpenAICompletion, OpenAICompletion,
OpenAICompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
OpenAIMessageParam, OpenAIMessageParam,
@ -316,23 +317,27 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, params: OpenAIEmbeddingsRequestWithExtraBody,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
""" """
Direct OpenAI embeddings API call. Direct OpenAI embeddings API call.
""" """
# Prepare request parameters
request_params = {
"model": await self._get_provider_model_id(params.model),
"input": params.input,
"encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
"dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
"user": params.user if params.user is not None else NOT_GIVEN,
}
# Add extra_body if present
extra_body = params.model_extra
if extra_body:
request_params["extra_body"] = extra_body
# Call OpenAI embeddings API with properly typed parameters # Call OpenAI embeddings API with properly typed parameters
response = await self.client.embeddings.create( response = await self.client.embeddings.create(**request_params)
model=await self._get_provider_model_id(model),
input=input,
encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
dimensions=dimensions if dimensions is not None else NOT_GIVEN,
user=user if user is not None else NOT_GIVEN,
)
data = [] data = []
for i, embedding_data in enumerate(response.data): for i, embedding_data in enumerate(response.data):
@ -350,7 +355,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
return OpenAIEmbeddingsResponse( return OpenAIEmbeddingsResponse(
data=data, data=data,
model=model, model=params.model,
usage=usage, usage=usage,
) )

View file

@ -10,8 +10,9 @@ import mimetypes
import time import time
import uuid import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any from typing import Annotated, Any
from fastapi import Body
from pydantic import TypeAdapter from pydantic import TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
@ -19,6 +20,8 @@ from llama_stack.apis.files import Files, OpenAIFileObject
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
OpenAICreateVectorStoreRequestWithExtraBody,
QueryChunksResponse, QueryChunksResponse,
SearchRankingOptions, SearchRankingOptions,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
@ -340,18 +343,18 @@ class OpenAIVectorStoreMixin(ABC):
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
name: str | None = None, params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
file_ids: list[str] | None = None,
expires_after: dict[str, Any] | None = None,
chunking_strategy: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
embedding_model: str | None = None,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorStoreObject: ) -> VectorStoreObject:
"""Creates a vector store.""" """Creates a vector store."""
created_at = int(time.time()) created_at = int(time.time())
# Extract llama-stack-specific parameters from extra_body
extra = params.model_extra or {}
provider_vector_db_id = extra.get("provider_vector_db_id")
embedding_model = extra.get("embedding_model")
embedding_dimension = extra.get("embedding_dimension", 384)
provider_id = extra.get("provider_id")
# Derive the canonical vector_db_id (allow override, else generate) # Derive the canonical vector_db_id (allow override, else generate)
vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
@ -372,7 +375,7 @@ class OpenAIVectorStoreMixin(ABC):
embedding_model=embedding_model, embedding_model=embedding_model,
provider_id=provider_id, provider_id=provider_id,
provider_resource_id=vector_db_id, provider_resource_id=vector_db_id,
vector_db_name=name, vector_db_name=params.name,
) )
await self.register_vector_db(vector_db) await self.register_vector_db(vector_db)
@ -391,19 +394,19 @@ class OpenAIVectorStoreMixin(ABC):
"id": vector_db_id, "id": vector_db_id,
"object": "vector_store", "object": "vector_store",
"created_at": created_at, "created_at": created_at,
"name": name, "name": params.name,
"usage_bytes": 0, "usage_bytes": 0,
"file_counts": file_counts.model_dump(), "file_counts": file_counts.model_dump(),
"status": status, "status": status,
"expires_after": expires_after, "expires_after": params.expires_after,
"expires_at": None, "expires_at": None,
"last_active_at": created_at, "last_active_at": created_at,
"file_ids": [], "file_ids": [],
"chunking_strategy": chunking_strategy, "chunking_strategy": params.chunking_strategy,
} }
# Add provider information to metadata if provided # Add provider information to metadata if provided
metadata = metadata or {} metadata = params.metadata or {}
if provider_id: if provider_id:
metadata["provider_id"] = provider_id metadata["provider_id"] = provider_id
if provider_vector_db_id: if provider_vector_db_id:
@ -417,7 +420,7 @@ class OpenAIVectorStoreMixin(ABC):
self.openai_vector_stores[vector_db_id] = store_info self.openai_vector_stores[vector_db_id] = store_info
# Now that our vector store is created, attach any files that were provided # Now that our vector store is created, attach any files that were provided
file_ids = file_ids or [] file_ids = params.file_ids or []
tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids]
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
@ -976,15 +979,13 @@ class OpenAIVectorStoreMixin(ABC):
async def openai_create_vector_store_file_batch( async def openai_create_vector_store_file_batch(
self, self,
vector_store_id: str, vector_store_id: str,
file_ids: list[str], params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileBatchObject: ) -> VectorStoreFileBatchObject:
"""Create a vector store file batch.""" """Create a vector store file batch."""
if vector_store_id not in self.openai_vector_stores: if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto() chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto()
created_at = int(time.time()) created_at = int(time.time())
batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}") batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}")
@ -996,8 +997,8 @@ class OpenAIVectorStoreMixin(ABC):
completed=0, completed=0,
cancelled=0, cancelled=0,
failed=0, failed=0,
in_progress=len(file_ids), in_progress=len(params.file_ids),
total=len(file_ids), total=len(params.file_ids),
) )
# Create batch object immediately with in_progress status # Create batch object immediately with in_progress status
@ -1011,8 +1012,8 @@ class OpenAIVectorStoreMixin(ABC):
batch_info = { batch_info = {
**batch_object.model_dump(), **batch_object.model_dump(),
"file_ids": file_ids, "file_ids": params.file_ids,
"attributes": attributes, "attributes": params.attributes,
"chunking_strategy": chunking_strategy.model_dump(), "chunking_strategy": chunking_strategy.model_dump(),
"expires_at": expires_at, "expires_at": expires_at,
} }

View file

@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import (
URL, URL,
InterleavedContent, InterleavedContent,
) )
from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
@ -274,10 +275,11 @@ class VectorDBWithIndex:
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
if chunks_to_embed: if chunks_to_embed:
resp = await self.inference_api.openai_embeddings( params = OpenAIEmbeddingsRequestWithExtraBody(
self.vector_db.embedding_model, model=self.vector_db.embedding_model,
[c.content for c in chunks_to_embed], input=[c.content for c in chunks_to_embed],
) )
resp = await self.inference_api.openai_embeddings(params)
for c, data in zip(chunks_to_embed, resp.data, strict=False): for c, data in zip(chunks_to_embed, resp.data, strict=False):
c.embedding = data.embedding c.embedding = data.embedding
@ -316,7 +318,11 @@ class VectorDBWithIndex:
if mode == "keyword": if mode == "keyword":
return await self.index.query_keyword(query_string, k, score_threshold) return await self.index.query_keyword(query_string, k, score_threshold)
embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string]) params = OpenAIEmbeddingsRequestWithExtraBody(
model=self.vector_db.embedding_model,
input=[query_string],
)
embeddings_response = await self.inference_api.openai_embeddings(params)
query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32) query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
if mode == "hybrid": if mode == "hybrid":
return await self.index.query_hybrid( return await self.index.query_hybrid(

View file

@ -15,6 +15,7 @@ from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
QueryChunksResponse, QueryChunksResponse,
VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyAuto,
VectorStoreFileObject, VectorStoreFileObject,
@ -326,8 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter):
vector_io_adapter._process_file_batch_async = AsyncMock() vector_io_adapter._process_file_batch_async = AsyncMock()
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
assert batch.vector_store_id == store_id assert batch.vector_store_id == store_id
@ -354,8 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter):
# Create batch first # Create batch first
created_batch = await vector_io_adapter.openai_create_vector_store_file_batch( created_batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Retrieve batch # Retrieve batch
@ -388,8 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Cancel batch # Cancel batch
@ -434,8 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# List files # List files
@ -455,7 +452,7 @@ async def test_file_batch_validation_errors(vector_io_adapter):
with pytest.raises(VectorStoreNotFoundError): with pytest.raises(VectorStoreNotFoundError):
await vector_io_adapter.openai_create_vector_store_file_batch( await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id="nonexistent", vector_store_id="nonexistent",
file_ids=["file_1"], params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]),
) )
# Setup store for remaining tests # Setup store for remaining tests
@ -472,8 +469,7 @@ async def test_file_batch_validation_errors(vector_io_adapter):
# Test wrong vector store for batch # Test wrong vector store for batch
vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock() vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"])
file_ids=["file_1"],
) )
# Create wrong_store so it exists but the batch doesn't belong to it # Create wrong_store so it exists but the batch doesn't belong to it
@ -520,8 +516,7 @@ async def test_file_batch_pagination(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Test pagination with limit # Test pagination with limit
@ -593,8 +588,7 @@ async def test_file_batch_status_filtering(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Test filtering by completed status # Test filtering by completed status
@ -636,8 +630,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Manually update status to completed # Manually update status to completed
@ -671,8 +664,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
batch_id = batch.id batch_id = batch.id
@ -727,8 +719,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter):
# Create batch # Create batch
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
batch_id = batch.id batch_id = batch.id
@ -775,10 +766,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter):
# Create multiple batches # Create multiple batches
batch1 = await vector_io_adapter.openai_create_vector_store_file_batch( batch1 = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, file_ids=["file_1"] vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"])
) )
batch2 = await vector_io_adapter.openai_create_vector_store_file_batch( batch2 = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, file_ids=["file_2"] vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"])
) )
# Complete one batch (should persist with completed status) # Complete one batch (should persist with completed status)
@ -791,7 +782,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter):
# Create a third batch that stays in progress # Create a third batch that stays in progress
batch3 = await vector_io_adapter.openai_create_vector_store_file_batch( batch3 = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, file_ids=["file_3"] vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"])
) )
# Simulate restart - clear memory and reload from persistence # Simulate restart - clear memory and reload from persistence
@ -952,8 +943,7 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5 file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5
batch = await vector_io_adapter.openai_create_vector_store_file_batch( batch = await vector_io_adapter.openai_create_vector_store_file_batch(
vector_store_id=store_id, vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
file_ids=file_ids,
) )
# Give time for the semaphore logic to start processing files # Give time for the semaphore logic to start processing files

View file

@ -13,7 +13,10 @@ from unittest.mock import AsyncMock, MagicMock
import numpy as np import numpy as np
import pytest import pytest
from llama_stack.apis.inference.inference import OpenAIEmbeddingData from llama_stack.apis.inference.inference import (
OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
)
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
@ -226,9 +229,14 @@ class TestVectorDBWithIndex:
await vector_db_with_index.insert_chunks(chunks) await vector_db_with_index.insert_chunks(chunks)
mock_inference_api.openai_embeddings.assert_called_once_with( # Verify openai_embeddings was called with correct params
"test-model without embeddings", ["Test 1", "Test 2"] mock_inference_api.openai_embeddings.assert_called_once()
) call_args = mock_inference_api.openai_embeddings.call_args[0]
assert len(call_args) == 1
params = call_args[0]
assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody)
assert params.model == "test-model without embeddings"
assert params.input == ["Test 1", "Test 2"]
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()
args = mock_index.add_chunks.call_args[0] args = mock_index.add_chunks.call_args[0]
assert args[0] == chunks assert args[0] == chunks
@ -321,9 +329,14 @@ class TestVectorDBWithIndex:
await vector_db_with_index.insert_chunks(chunks) await vector_db_with_index.insert_chunks(chunks)
mock_inference_api.openai_embeddings.assert_called_once_with( # Verify openai_embeddings was called with correct params
"test-model with partial embeddings", ["Test 1", "Test 3"] mock_inference_api.openai_embeddings.assert_called_once()
) call_args = mock_inference_api.openai_embeddings.call_args[0]
assert len(call_args) == 1
params = call_args[0]
assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody)
assert params.model == "test-model with partial embeddings"
assert params.input == ["Test 1", "Test 3"]
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()
args = mock_index.add_chunks.call_args[0] args = mock_index.add_chunks.call_args[0]
assert len(args[0]) == 3 assert len(args[0]) == 3