mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-14 06:23:45 +00:00
feat(api)!: support extra_body to embeddings and vector_stores APIs (#3794)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 0s
Python Package Build Test / build (3.12) (push) Failing after 1s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
UI Tests / ui-tests (22) (push) Successful in 40s
Pre-commit / pre-commit (push) Successful in 1m23s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 0s
Python Package Build Test / build (3.12) (push) Failing after 1s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
UI Tests / ui-tests (22) (push) Successful in 40s
Pre-commit / pre-commit (push) Successful in 1m23s
Applies the same pattern from https://github.com/llamastack/llama-stack/pull/3777 to embeddings and vector_stores.create() endpoints. This should _not_ be a breaking change since (a) our tests were already using the `extra_body` parameter when passing in to the backend (b) but the backend probably wasn't extracting the parameters correctly. This PR will fix that. Updated APIs: `openai_embeddings(), openai_create_vector_store(), openai_create_vector_store_file_batch()`
This commit is contained in:
parent
3bb6ef351b
commit
ecc8a554d2
26 changed files with 451 additions and 426 deletions
|
@ -1140,6 +1140,25 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
|
|||
user: str | None = None
|
||||
|
||||
|
||||
# extra_body can be accessed via .model_extra
|
||||
@json_schema_type
|
||||
class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
|
||||
"""Request parameters for OpenAI-compatible embeddings endpoint.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
|
||||
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
|
||||
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
|
||||
"""
|
||||
|
||||
model: str
|
||||
input: str | list[str]
|
||||
encoding_format: str | None = "float"
|
||||
dimensions: int | None = None
|
||||
user: str | None = None
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class InferenceProvider(Protocol):
|
||||
|
@ -1200,21 +1219,11 @@ class InferenceProvider(Protocol):
|
|||
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: str | list[str],
|
||||
encoding_format: str | None = "float",
|
||||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)],
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
"""Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
|
||||
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
|
||||
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
|
||||
:returns: An OpenAIEmbeddingsResponse containing the embeddings.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
import uuid
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import Body
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
|
@ -466,6 +467,40 @@ class VectorStoreFilesListInBatchResponse(BaseModel):
|
|||
has_more: bool = False
|
||||
|
||||
|
||||
# extra_body can be accessed via .model_extra
|
||||
@json_schema_type
|
||||
class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
|
||||
"""Request to create a vector store with extra_body support.
|
||||
|
||||
:param name: (Optional) A name for the vector store
|
||||
:param file_ids: List of file IDs to include in the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param chunking_strategy: (Optional) Strategy for splitting files into chunks
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
file_ids: list[str] | None = None
|
||||
expires_after: dict[str, Any] | None = None
|
||||
chunking_strategy: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
# extra_body can be accessed via .model_extra
|
||||
@json_schema_type
|
||||
class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"):
|
||||
"""Request to create a vector store file batch with extra_body support.
|
||||
|
||||
:param file_ids: A list of File IDs that the vector store should use
|
||||
:param attributes: (Optional) Key-value attributes to store with the files
|
||||
:param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto
|
||||
"""
|
||||
|
||||
file_ids: list[str]
|
||||
attributes: dict[str, Any] | None = None
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
|
||||
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
|
||||
|
||||
|
@ -516,25 +551,11 @@ class VectorIO(Protocol):
|
|||
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
|
||||
) -> VectorStoreObject:
|
||||
"""Creates a vector store.
|
||||
|
||||
:param name: A name for the vector store.
|
||||
:param file_ids: A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files.
|
||||
:param expires_after: The expiration policy for a vector store.
|
||||
:param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
|
||||
:param metadata: Set of 16 key-value pairs that can be attached to an object.
|
||||
:param embedding_model: The embedding model to use for this vector store.
|
||||
:param embedding_dimension: The dimension of the embedding vectors (default: 384).
|
||||
:param provider_id: The ID of the provider to use for this vector store.
|
||||
Generate an OpenAI-compatible vector store with the given parameters.
|
||||
:returns: A VectorStoreObject representing the created vector store.
|
||||
"""
|
||||
...
|
||||
|
@ -827,16 +848,12 @@ class VectorIO(Protocol):
|
|||
async def openai_create_vector_store_file_batch(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_ids: list[str],
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
|
||||
) -> VectorStoreFileBatchObject:
|
||||
"""Create a vector store file batch.
|
||||
|
||||
Generate an OpenAI-compatible vector store file batch for the given vector store.
|
||||
:param vector_store_id: The ID of the vector store to create the file batch for.
|
||||
:param file_ids: A list of File IDs that the vector store should use.
|
||||
:param attributes: (Optional) Key-value attributes to store with the files.
|
||||
:param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
|
||||
:returns: A VectorStoreFileBatchObject representing the created file batch.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue