chore!: add double routes for v1/openai/v1 (#3636)

So that users get a warning in 0.3.0 and we remove them in 0.4.0. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-03 18:00:36 +00:00 · 2025-10-02 16:11:05 +02:00 · 2025-10-02 16:11:05 +02:00 · 4161102100
commit 4161102100
parent f1748e2f92
11 changed files with 12768 additions and 22 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -1310,16 +1310,11 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "An OpenAIResponseObject.",
+                        "description": "A ListOpenAIResponseObject.",
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIResponseObject"
-                                }
-                            },
-                            "text/event-stream": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
                                }
                            }
                        }
@ -1340,14 +1335,14 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "List all OpenAI responses.",
+                "description": "List all OpenAI responses.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
-                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+                                "$ref": "#/components/schemas/ListOpenaiResponsesRequest"
                            }
                        }
                    },
@ -8238,6 +8233,33 @@
                ],
                "title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
            },
+            "ListOpenaiResponsesRequest": {
+                "type": "object",
+                "properties": {
+                    "after": {
+                        "type": "string",
+                        "description": "The ID of the last response to return."
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "The number of responses to return."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model to filter responses by."
+                    },
+                    "order": {
+                        "type": "string",
+                        "enum": [
+                            "asc",
+                            "desc"
+                        ],
+                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc')."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ListOpenaiResponsesRequest"
+            },
            "OpenAIDeleteResponseObject": {
                "type": "object",
                "properties": {
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -967,14 +967,11 @@ paths:
    post:
      responses:
        '200':
-          description: An OpenAIResponseObject.
+          description: A ListOpenAIResponseObject.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/OpenAIResponseObject'
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/OpenAIResponseObjectStream'
+                $ref: '#/components/schemas/ListOpenAIResponseObject'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -987,14 +984,14 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: List all OpenAI responses.
+      description: List all OpenAI responses.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
-              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+              $ref: '#/components/schemas/ListOpenaiResponsesRequest'
        required: true
      deprecated: false
  /v1/responses/{response_id}:
@ -6199,6 +6196,27 @@ components:
        - type
      title: >-
        OpenAIResponseObjectStreamResponseWebSearchCallSearching
+    ListOpenaiResponsesRequest:
+      type: object
+      properties:
+        after:
+          type: string
+          description: The ID of the last response to return.
+        limit:
+          type: integer
+          description: The number of responses to return.
+        model:
+          type: string
+          description: The model to filter responses by.
+        order:
+          type: string
+          enum:
+            - asc
+            - desc
+          description: >-
+            The order to sort responses by when sorted by created_at ('asc' or 'desc').
+      additionalProperties: false
+      title: ListOpenaiResponsesRequest
    OpenAIDeleteResponseObject:
      type: object
      properties:
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -772,6 +772,12 @@ class Agents(Protocol):
    #
    # Both of these APIs are inherently stateful.

+    @webmethod(
+        route="/openai/v1/responses/{response_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
@ -784,6 +790,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
@ -809,6 +816,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
@ -828,10 +836,9 @@ class Agents(Protocol):
        ...

    @webmethod(
-        route="/responses/{response_id}/input_items",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
+        route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
    )
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
        response_id: str,
@ -853,6 +860,7 @@ class Agents(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete an OpenAI response by its ID.
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@ -43,6 +43,7 @@ class Batches(Protocol):
    Note: This API is currently under active development and may undergo changes.
    """

+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
    async def create_batch(
        self,
@ -63,6 +64,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve_batch(self, batch_id: str) -> BatchObject:
        """Retrieve information about a specific batch.
@ -72,6 +74,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    async def cancel_batch(self, batch_id: str) -> BatchObject:
        """Cancel a batch that is in progress.
@ -81,6 +84,7 @@ class Batches(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
    async def list_batches(
        self,
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -105,6 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
 class Files(Protocol):
    # OpenAI Files API Endpoints
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_upload_file(
        self,
@ -127,6 +128,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files(
        self,
@ -146,6 +148,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file(
        self,
@ -159,6 +162,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_file(
        self,
@ -172,6 +176,7 @@ class Files(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file_content(
        self,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1066,6 +1066,7 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Reranking is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_completion(
        self,
@ -1117,6 +1118,7 @@ class InferenceProvider(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_chat_completion(
        self,
@ -1173,6 +1175,7 @@ class InferenceProvider(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_embeddings(
        self,
@ -1202,6 +1205,7 @@ class Inference(InferenceProvider):
    - Embedding models: these models generate embeddings to be used for semantic search.
    """

+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_chat_completions(
        self,
@ -1220,6 +1224,9 @@ class Inference(InferenceProvider):
        """
        raise NotImplementedError("List chat completions is not implemented")

+    @webmethod(
+        route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
        """Describe a chat completion by its ID.
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -111,6 +111,14 @@ class Models(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        """List models using the OpenAI API.
+
+        :returns: A OpenAIListModelsResponse.
+        """
+        ...
+
    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_model(
        self,
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -114,6 +114,7 @@ class Safety(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -512,6 +512,7 @@ class VectorIO(Protocol):
        ...

    # OpenAI Vector Stores API endpoints
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_create_vector_store(
        self,
@ -538,6 +539,7 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_vector_stores(
        self,
@ -556,6 +558,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store(
        self,
@ -568,6 +573,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}",
        method="POST",
@ -590,6 +598,9 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}",
        method="DELETE",
@ -606,6 +617,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/search",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/search",
        method="POST",
@ -638,6 +655,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files",
        method="POST",
@ -660,6 +683,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files",
        method="GET",
@ -686,6 +715,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="GET",
@ -704,6 +739,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
        method="GET",
@ -722,6 +763,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="POST",
@ -742,6 +789,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}",
        method="DELETE",
@ -765,6 +818,12 @@ class VectorIO(Protocol):
        method="POST",
        level=LLAMA_STACK_API_V1,
    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    async def openai_create_vector_store_file_batch(
        self,
        vector_store_id: str,
@ -787,6 +846,12 @@ class VectorIO(Protocol):
        method="GET",
        level=LLAMA_STACK_API_V1,
    )
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    async def openai_retrieve_vector_store_file_batch(
        self,
        batch_id: str,
@ -800,6 +865,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
        method="GET",
@ -828,6 +899,12 @@ class VectorIO(Protocol):
        """
        ...

+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+        method="POST",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
        method="POST",