Merge remote-tracking branch 'origin/main' into dependabot/uv/openai-2.5.0

2025-12-13 04:22:35 +00:00 · 2025-10-22 12:17:03 -07:00 · 2025-10-22 12:17:03 -07:00 · 13450c1a68
commit 13450c1a68
parent 090fa7007e bb1ebb3c6b
317 changed files with 86802 additions and 18957 deletions
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
    :param tools: (Optional) An array of tools the model may call while generating a response.
    :param truncation: (Optional) Truncation strategy applied to the response
    :param usage: (Optional) Token usage information for the response
+    :param instructions: (Optional) System message inserted into the model's context
    """

    created_at: int
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
    tools: list[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
+    instructions: str | None = None


@json_schema_type
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):

    models = "models"
    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1234,9 +1234,10 @@ class Inference(InferenceProvider):

    Llama Stack Inference API for generating completions, chat completions, and embeddings.

-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
    """

    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -27,10 +27,12 @@ class ModelType(StrEnum):
    """Enumeration of supported model types in Llama Stack.
    :cvar llm: Large language model for text generation and completion
    :cvar embedding: Embedding model for converting text to vector representations
+    :cvar rerank: Reranking model for reordering documents based on their relevance to a query
    """

    llm = "llm"
    embedding = "embedding"
+    rerank = "rerank"


@json_schema_type
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
 class ResourceType(StrEnum):
    model = "model"
    shield = "shield"
-    vector_db = "vector_db"
+    vector_store = "vector_store"
    dataset = "dataset"
    scoring_function = "scoring_function"
    benchmark = "benchmark"
@ -34,4 +34,4 @@ class Resource(BaseModel):

    provider_id: str = Field(description="ID of the provider that owns this resource")

-    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
+    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -15,7 +15,7 @@ from fastapi import Body
 from pydantic import BaseModel, Field

 from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
    total: int


+# TODO: rename this as OpenAIVectorStore
@json_schema_type
 class VectorStoreObject(BaseModel):
    """OpenAI Vector Store object.
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
    chunking_strategy: VectorStoreChunkingStrategy | None = None


-class VectorDBStore(Protocol):
-    def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
+class VectorStoreTable(Protocol):
+    def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...


@runtime_checkable
@trace_protocol
 class VectorIO(Protocol):
-    vector_db_store: VectorDBStore | None = None
+    vector_store_table: VectorStoreTable | None = None

    # this will just block now until chunks are inserted, but it should
    # probably return a Job instance which can be polled for completion
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert_chunks(
        self,
@ -546,6 +548,7 @@ class VectorIO(Protocol):
        """
        ...

+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
    async def query_chunks(
        self,
--- a/llama_stack/apis/vector_stores/init.py
+++ b/llama_stack/apis/vector_stores/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from .vector_dbs import *
+from .vector_stores import *
--- a/llama_stack/apis/vector_stores/vector_stores.py
+++ b/llama_stack/apis/vector_stores/vector_stores.py
@ -9,53 +9,43 @@ from typing import Literal
 from pydantic import BaseModel

 from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.schema_utils import json_schema_type


-@json_schema_type
-class VectorDB(Resource):
+# Internal resource type for storing the vector store routing and other information
+class VectorStore(Resource):
    """Vector database resource for storing and querying vector embeddings.

-    :param type: Type of resource, always 'vector_db' for vector databases
+    :param type: Type of resource, always 'vector_store' for vector stores
    :param embedding_model: Name of the embedding model to use for vector generation
    :param embedding_dimension: Dimension of the embedding vectors
    """

-    type: Literal[ResourceType.vector_db] = ResourceType.vector_db
+    type: Literal[ResourceType.vector_store] = ResourceType.vector_store

    embedding_model: str
    embedding_dimension: int
-    vector_db_name: str | None = None
+    vector_store_name: str | None = None

    @property
-    def vector_db_id(self) -> str:
+    def vector_store_id(self) -> str:
        return self.identifier

    @property
-    def provider_vector_db_id(self) -> str | None:
+    def provider_vector_store_id(self) -> str | None:
        return self.provider_resource_id


-class VectorDBInput(BaseModel):
+class VectorStoreInput(BaseModel):
    """Input parameters for creating or configuring a vector database.

-    :param vector_db_id: Unique identifier for the vector database
+    :param vector_store_id: Unique identifier for the vector store
    :param embedding_model: Name of the embedding model to use for vector generation
    :param embedding_dimension: Dimension of the embedding vectors
-    :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
+    :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
    """

-    vector_db_id: str
+    vector_store_id: str
    embedding_model: str
    embedding_dimension: int
    provider_id: str | None = None
-    provider_vector_db_id: str | None = None
-
-
-class ListVectorDBsResponse(BaseModel):
-    """Response from listing vector databases.
-
-    :param data: List of vector databases
-    """
-
-    data: list[VectorDB]
+    provider_vector_store_id: str | None = None