Merge branch 'main' into remove-deprecated-chat-completion

2025-10-06 04:34:57 +00:00 · 2025-10-02 18:19:06 -04:00 · 2025-10-02 18:19:06 -04:00 · ee6a502289
commit ee6a502289
parent f754e1b65b ef0736527d
209 changed files with 109297 additions and 8828 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -772,6 +772,12 @@ class Agents(Protocol):
    #
    # Both of these APIs are inherently stateful.

+    @webmethod(
+        route="/openai/v1/responses/{response_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
@ -784,6 +790,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
@ -809,6 +816,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
@ -828,10 +836,9 @@ class Agents(Protocol):
        ...

    @webmethod(
-        route="/responses/{response_id}/input_items",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
+        route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
    )
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
        response_id: str,
@ -853,6 +860,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete an OpenAI response by its ID.
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@ -43,6 +43,7 @@ class Batches(Protocol):
    Note: This API is currently under active development and may undergo changes.
    """

+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
    async def create_batch(
        self,
@ -63,6 +64,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve_batch(self, batch_id: str) -> BatchObject:
        """Retrieve information about a specific batch.
@ -72,6 +74,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    async def cancel_batch(self, batch_id: str) -> BatchObject:
        """Cancel a batch that is in progress.
@ -81,6 +84,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
    async def list_batches(
        self,
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -105,6 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
 class Files(Protocol):
    # OpenAI Files API Endpoints
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_upload_file(
        self,
@ -127,6 +128,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files(
        self,
@ -146,6 +148,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file(
        self,
@ -159,6 +162,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_file(
        self,
@ -172,6 +176,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file_content(
        self,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -27,14 +27,12 @@ from llama_stack.models.llama.datatypes import (
    StopReason,
    ToolCall,
    ToolDefinition,
-    ToolParamDefinition,
    ToolPromptFormat,
 )
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

 register_schema(ToolCall)
-register_schema(ToolParamDefinition)
 register_schema(ToolDefinition)

 from enum import StrEnum
@ -1027,6 +1025,7 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Reranking is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_completion(
        self,
@ -1078,6 +1077,7 @@ class InferenceProvider(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_chat_completion(
        self,
@ -1134,6 +1134,7 @@ class InferenceProvider(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_embeddings(
        self,
@ -1163,6 +1164,7 @@ class Inference(InferenceProvider):
    - Embedding models: these models generate embeddings to be used for semantic search.
    """

+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_chat_completions(
        self,
@ -1181,6 +1183,9 @@ class Inference(InferenceProvider):
        """
        raise NotImplementedError("List chat completions is not implemented")

+    @webmethod(
+        route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
        """Describe a chat completion by its ID.
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -111,6 +111,14 @@ class Models(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        """List models using the OpenAI API.
+
+        :returns: A OpenAIListModelsResponse.
+        """
+        ...
+
    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_model(
        self,
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -114,6 +114,7 @@ class Safety(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@ -7,7 +7,7 @@
 from enum import Enum
 from typing import Any, Literal, Protocol

-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from typing_extensions import runtime_checkable

 from llama_stack.apis.common.content_types import URL, InterleavedContent
@ -19,59 +19,23 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 from .rag_tool import RAGToolRuntime


-@json_schema_type
-class ToolParameter(BaseModel):
-    """Parameter definition for a tool.
-
-    :param name: Name of the parameter
-    :param parameter_type: Type of the parameter (e.g., string, integer)
-    :param description: Human-readable description of what the parameter does
-    :param required: Whether this parameter is required for tool invocation
-    :param items: Type of the elements when parameter_type is array
-    :param title: (Optional) Title of the parameter
-    :param default: (Optional) Default value for the parameter if not provided
-    """
-
-    name: str
-    parameter_type: str
-    description: str
-    required: bool = Field(default=True)
-    items: dict | None = None
-    title: str | None = None
-    default: Any | None = None
-
-
-@json_schema_type
-class Tool(Resource):
-    """A tool that can be invoked by agents.
-
-    :param type: Type of resource, always 'tool'
-    :param toolgroup_id: ID of the tool group this tool belongs to
-    :param description: Human-readable description of what the tool does
-    :param parameters: List of parameters this tool accepts
-    :param metadata: (Optional) Additional metadata about the tool
-    """
-
-    type: Literal[ResourceType.tool] = ResourceType.tool
-    toolgroup_id: str
-    description: str
-    parameters: list[ToolParameter]
-    metadata: dict[str, Any] | None = None
-
-
@json_schema_type
 class ToolDef(BaseModel):
    """Tool definition used in runtime contexts.

    :param name: Name of the tool
    :param description: (Optional) Human-readable description of what the tool does
-    :param parameters: (Optional) List of parameters this tool accepts
+    :param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema)
+    :param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema)
    :param metadata: (Optional) Additional metadata about the tool
+    :param toolgroup_id: (Optional) ID of the tool group this tool belongs to
    """

+    toolgroup_id: str | None = None
    name: str
    description: str | None = None
-    parameters: list[ToolParameter] | None = None
+    input_schema: dict[str, Any] | None = None
+    output_schema: dict[str, Any] | None = None
    metadata: dict[str, Any] | None = None


@ -122,7 +86,7 @@ class ToolInvocationResult(BaseModel):


 class ToolStore(Protocol):
-    async def get_tool(self, tool_name: str) -> Tool: ...
+    async def get_tool(self, tool_name: str) -> ToolDef: ...
    async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...


@ -135,15 +99,6 @@ class ListToolGroupsResponse(BaseModel):
    data: list[ToolGroup]


-class ListToolsResponse(BaseModel):
-    """Response containing a list of tools.
-
-    :param data: List of tools
-    """
-
-    data: list[Tool]
-
-
 class ListToolDefsResponse(BaseModel):
    """Response containing a list of tool definitions.

@ -194,11 +149,11 @@ class ToolGroups(Protocol):
        ...

    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
-    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
+    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
        """List tools with optional tool group.

        :param toolgroup_id: The ID of the tool group to list tools for.
-        :returns: A ListToolsResponse.
+        :returns: A ListToolDefsResponse.
        """
        ...

@ -206,11 +161,11 @@ class ToolGroups(Protocol):
    async def get_tool(
        self,
        tool_name: str,
-    ) -> Tool:
+    ) -> ToolDef:
        """Get a tool by its name.

        :param tool_name: The name of the tool to get.
-        :returns: A Tool.
+        :returns: A ToolDef.
        """
        ...

--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -512,6 +512,7 @@ class VectorIO(Protocol):
        ...

    # OpenAI Vector Stores API endpoints
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_create_vector_store(
        self,
@ -538,6 +539,7 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_vector_stores(
        self,
@ -556,6 +558,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store(
        self,
@ -568,6 +573,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}",
        method="POST",
@ -590,6 +598,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}",
        method="DELETE",
@ -606,6 +617,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/search",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/search",
        method="POST",
@ -638,6 +655,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files",
        method="POST",
@ -660,6 +683,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files",
        method="GET",
@ -686,6 +715,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="GET",
@ -704,6 +739,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
        method="GET",
@ -722,6 +763,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="POST",
@ -742,6 +789,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="DELETE",
@ -765,6 +818,12 @@ class VectorIO(Protocol):
        method="POST",
        level=LLAMA_STACK_API_V1,
    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    async def openai_create_vector_store_file_batch(
        self,
        vector_store_id: str,
@ -787,6 +846,12 @@ class VectorIO(Protocol):
        method="GET",
        level=LLAMA_STACK_API_V1,
    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    async def openai_retrieve_vector_store_file_batch(
        self,
        batch_id: str,
@ -800,6 +865,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
        method="GET",
@ -828,6 +899,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
        method="POST",