From 975ead1d6ab04bcb67645c491593c8dc95b32aab Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 29 Sep 2025 14:45:09 -0400
Subject: [PATCH 01/13] chore(api): remove deprecated embeddings impls (#3301)

# What does this PR do?

remove deprecated embeddings implementations
---
 docs/static/llama-stack-spec.html             | 118 ------------------
 docs/static/llama-stack-spec.yaml             | 101 ---------------
 llama_stack/apis/inference/inference.py       |  22 +---
 llama_stack/core/routers/inference.py         |  23 ----
 .../remote/inference/bedrock/bedrock.py       |  36 ------
 .../remote/inference/cerebras/cerebras.py     |  14 ---
 .../remote/inference/databricks/databricks.py |  14 ---
 .../remote/inference/fireworks/fireworks.py   |  31 -----
 .../remote/inference/nvidia/nvidia.py         |  59 ---------
 .../remote/inference/ollama/ollama.py         |  27 ----
 .../inference/passthrough/passthrough.py      |  22 ----
 .../remote/inference/runpod/runpod.py         |  10 --
 .../providers/remote/inference/tgi/tgi.py     |  14 ---
 .../remote/inference/together/together.py     |  26 ----
 .../providers/remote/inference/vllm/vllm.py   |  30 -----
 .../remote/inference/watsonx/watsonx.py       |  15 +--
 .../utils/inference/embedding_mixin.py        |  22 ----
 .../utils/inference/litellm_openai_mixin.py   |  25 ----
 tests/unit/providers/vector_io/test_faiss.py  |  26 +---
 19 files changed, 3 insertions(+), 632 deletions(-)

diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 9e28e0f42..7845fb068 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -1035,50 +1035,6 @@
                 ]
             }
         },
-        "/v1/inference/embeddings": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EmbeddingsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate embeddings for content pieces using the specified model.",
-                "description": "Generate embeddings for content pieces using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/EmbeddingsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
             "post": {
                 "responses": {
@@ -10547,80 +10503,6 @@
                 "title": "OpenAIDeleteResponseObject",
                 "description": "Response object confirming deletion of an OpenAI response."
             },
-            "EmbeddingsRequest": {
-                "type": "object",
-                "properties": {
-                    "model_id": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "contents": {
-                        "oneOf": [
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "$ref": "#/components/schemas/InterleavedContentItem"
-                                }
-                            }
-                        ],
-                        "description": "List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text."
-                    },
-                    "text_truncation": {
-                        "type": "string",
-                        "enum": [
-                            "none",
-                            "start",
-                            "end"
-                        ],
-                        "description": "(Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length."
-                    },
-                    "output_dimension": {
-                        "type": "integer",
-                        "description": "(Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models."
-                    },
-                    "task_type": {
-                        "type": "string",
-                        "enum": [
-                            "query",
-                            "document"
-                        ],
-                        "description": "(Optional) How is the embedding being used? This is only supported by asymmetric embedding models."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model_id",
-                    "contents"
-                ],
-                "title": "EmbeddingsRequest"
-            },
-            "EmbeddingsResponse": {
-                "type": "object",
-                "properties": {
-                    "embeddings": {
-                        "type": "array",
-                        "items": {
-                            "type": "array",
-                            "items": {
-                                "type": "number"
-                            }
-                        },
-                        "description": "List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "embeddings"
-                ],
-                "title": "EmbeddingsResponse",
-                "description": "Response containing generated embeddings."
-            },
             "AgentCandidate": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 1c06c74a5..8cbbccaa2 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -720,41 +720,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/inference/embeddings:
-    post:
-      responses:
-        '200':
-          description: >-
-            An array of embeddings, one for each content. Each embedding is a list
-            of floats. The dimensionality of the embedding is model-specific; you
-            can check model metadata using /models/{model_id}.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate embeddings for content pieces using the specified model.
-      description: >-
-        Generate embeddings for content pieces using the specified model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
-        required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -7795,72 +7760,6 @@ components:
       title: OpenAIDeleteResponseObject
       description: >-
         Response object confirming deletion of an OpenAI response.
-    EmbeddingsRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: >-
-            The identifier of the model to use. The model must be an embedding model
-            registered with Llama Stack and available via the /models endpoint.
-        contents:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-          description: >-
-            List of contents to generate embeddings for. Each content can be a string
-            or an InterleavedContentItem (and hence can be multimodal). The behavior
-            depends on the model and provider. Some models may only support text.
-        text_truncation:
-          type: string
-          enum:
-            - none
-            - start
-            - end
-          description: >-
-            (Optional) Config for how to truncate text for embedding when text is
-            longer than the model's max sequence length.
-        output_dimension:
-          type: integer
-          description: >-
-            (Optional) Output dimensionality for the embeddings. Only supported by
-            Matryoshka models.
-        task_type:
-          type: string
-          enum:
-            - query
-            - document
-          description: >-
-            (Optional) How is the embedding being used? This is only supported by
-            asymmetric embedding models.
-      additionalProperties: false
-      required:
-        - model_id
-        - contents
-      title: EmbeddingsRequest
-    EmbeddingsResponse:
-      type: object
-      properties:
-        embeddings:
-          type: array
-          items:
-            type: array
-            items:
-              type: number
-          description: >-
-            List of embedding vectors, one per input content. Each embedding is a
-            list of floats. The dimensionality of the embedding is model-specific;
-            you can check model metadata using /models/{model_id}
-      additionalProperties: false
-      required:
-        - embeddings
-      title: EmbeddingsResponse
-      description: >-
-        Response containing generated embeddings.
     AgentCandidate:
       type: object
       properties:
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 756896796..c6a4e4f60 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -17,7 +17,7 @@ from typing import (
 from pydantic import BaseModel, Field, field_validator
 from typing_extensions import TypedDict
 
-from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
+from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
@@ -1070,26 +1070,6 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        """Generate embeddings for content pieces using the specified model.
-
-        :param model_id: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
-        :param contents: List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text.
-        :param output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models.
-        :param text_truncation: (Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length.
-        :param task_type: (Optional) How is the embedding being used? This is only supported by asymmetric embedding models.
-        :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.
-        """
-        ...
-
     @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def rerank(
         self,
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index fcf01a9c4..80f47fb5d 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -16,7 +16,6 @@ from pydantic import Field, TypeAdapter
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.inference import (
@@ -26,8 +25,6 @@ from llama_stack.apis.inference import (
     CompletionMessage,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     ListOpenAIChatCompletionResponse,
     LogProbConfig,
@@ -48,7 +45,6 @@ from llama_stack.apis.inference import (
     ResponseFormat,
     SamplingParams,
     StopReason,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -312,25 +308,6 @@ class InferenceRouter(Inference):
 
         return response
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        logger.debug(f"InferenceRouter.embeddings: {model_id}")
-        await self._get_model(model_id, ModelType.embedding)
-        provider = await self.routing_table.get_provider_impl(model_id)
-        return await provider.embeddings(
-            model_id=model_id,
-            contents=contents,
-            text_truncation=text_truncation,
-            output_dimension=output_dimension,
-            task_type=task_type,
-        )
-
     async def openai_completion(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 29b935bbd..2206aa641 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -11,21 +11,17 @@ from botocore.client import BaseClient
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     ChatCompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
     OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -47,8 +43,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
     chat_completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
 )
 
 from .models import MODEL_ENTRIES
@@ -218,36 +212,6 @@ class BedrockInferenceAdapter(
             ),
         }
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self.model_store.get_model(model_id)
-
-        # Convert foundation model ID to inference profile ID
-        region_name = self.client.meta.region_name
-        inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
-
-        embeddings = []
-        for content in contents:
-            assert not content_has_media(content), "Bedrock does not support media for embeddings"
-            input_text = interleaved_content_as_str(content)
-            input_body = {"inputText": input_text}
-            body = json.dumps(input_body)
-            response = self.client.invoke_model(
-                body=body,
-                modelId=inference_profile_id,
-                accept="application/json",
-                contentType="application/json",
-            )
-            response_body = json.loads(response.get("body").read())
-            embeddings.append(response_body.get("embedding"))
-        return EmbeddingsResponse(embeddings=embeddings)
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 6662f004d..6be39fa5d 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -11,21 +11,17 @@ from cerebras.cloud.sdk import AsyncCerebras
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     CompletionRequest,
     CompletionResponse,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
     OpenAIEmbeddingsResponse,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -187,16 +183,6 @@ class CerebrasInferenceAdapter(
             **get_sampling_options(request.sampling_params),
         }
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index 6eac6e4f4..d85b477f5 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -11,15 +11,12 @@ from databricks.sdk import WorkspaceClient
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionResponse,
     ChatCompletionResponseStreamChunk,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
@@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
     OpenAICompletion,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -118,16 +114,6 @@ class DatabricksInferenceAdapter(
     ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
         raise NotImplementedError()
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-
     async def list_models(self) -> list[Model] | None:
         self._model_cache = {}  # from OpenAIMixin
         ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key())  # TODO: this is not async
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 069a0a674..ed4b56fad 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -10,22 +10,18 @@ from fireworks.client import Fireworks
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     CompletionRequest,
     CompletionResponse,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -48,8 +44,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
     chat_completion_request_to_prompt,
     completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
     request_has_media,
 )
 
@@ -259,28 +253,3 @@ class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, Nee
         logger.debug(f"params to fireworks: {params}")
 
         return params
-
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self.model_store.get_model(model_id)
-
-        kwargs = {}
-        if model.metadata.get("embedding_dimension"):
-            kwargs["dimensions"] = model.metadata.get("embedding_dimension")
-        assert all(not content_has_media(content) for content in contents), (
-            "Fireworks does not support media for embeddings"
-        )
-        response = self._get_client().embeddings.create(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-            **kwargs,
-        )
-
-        embeddings = [data.embedding for data in response.data]
-        return EmbeddingsResponse(embeddings=embeddings)
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 92094a0f3..a31981adb 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -11,8 +11,6 @@ from openai import NOT_GIVEN, APIConnectionError
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
-    TextContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
@@ -21,8 +19,6 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
@@ -31,7 +27,6 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingUsage,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
 )
@@ -156,60 +151,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
             # we pass n=1 to get only one completion
             return convert_openai_completion_choice(response.choices[0])
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        if any(content_has_media(content) for content in contents):
-            raise NotImplementedError("Media is not supported")
-
-        #
-        # Llama Stack: contents = list[str] | list[InterleavedContentItem]
-        #  ->
-        # OpenAI: input = str | list[str]
-        #
-        # we can ignore str and always pass list[str] to OpenAI
-        #
-        flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
-        input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
-        provider_model_id = await self._get_provider_model_id(model_id)
-
-        extra_body = {}
-
-        if text_truncation is not None:
-            text_truncation_options = {
-                TextTruncation.none: "NONE",
-                TextTruncation.end: "END",
-                TextTruncation.start: "START",
-            }
-            extra_body["truncate"] = text_truncation_options[text_truncation]
-
-        if output_dimension is not None:
-            extra_body["dimensions"] = output_dimension
-
-        if task_type is not None:
-            task_type_options = {
-                EmbeddingTaskType.document: "passage",
-                EmbeddingTaskType.query: "query",
-            }
-            extra_body["input_type"] = task_type_options[task_type]
-
-        response = await self.client.embeddings.create(
-            model=provider_model_id,
-            input=input,
-            extra_body=extra_body,
-        )
-        #
-        # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...)
-        #  ->
-        # Llama Stack: EmbeddingsResponse(embeddings=list[list[float]])
-        #
-        return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 3fb10445f..16b104fb5 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -14,7 +14,6 @@ from ollama import AsyncClient as AsyncOllamaClient
 from llama_stack.apis.common.content_types import (
     ImageContentItem,
     InterleavedContent,
-    InterleavedContentItem,
     TextContentItem,
 )
 from llama_stack.apis.common.errors import UnsupportedModelError
@@ -25,8 +24,6 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     GrammarResponseFormat,
     InferenceProvider,
     JsonSchemaResponseFormat,
@@ -34,7 +31,6 @@ from llama_stack.apis.inference import (
     Message,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -66,9 +62,7 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
     chat_completion_request_to_prompt,
     completion_request_to_prompt,
-    content_has_media,
     convert_image_content_to_url,
-    interleaved_content_as_str,
     request_has_media,
 )
 
@@ -363,27 +357,6 @@ class OllamaInferenceAdapter(
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self._get_model(model_id)
-
-        assert all(not content_has_media(content) for content in contents), (
-            "Ollama does not support media for embeddings"
-        )
-        response = await self.ollama_client.embed(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-        )
-        embeddings = response["embeddings"]
-
-        return EmbeddingsResponse(embeddings=embeddings)
-
     async def register_model(self, model: Model) -> Model:
         if await self.check_model_availability(model.provider_model_id):
             return model
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index a2bdf0369..ae482b7b0 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -14,8 +14,6 @@ from llama_stack.apis.inference import (
     ChatCompletionResponse,
     ChatCompletionResponseStreamChunk,
     CompletionMessage,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
@@ -27,7 +25,6 @@ from llama_stack.apis.inference import (
     OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -190,25 +187,6 @@ class PassthroughInferenceAdapter(Inference):
             chunk = convert_to_pydantic(ChatCompletionResponseStreamChunk, chunk)
             yield chunk
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[InterleavedContent],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        client = self._get_client()
-        model = await self.model_store.get_model(model_id)
-
-        return await client.inference.embeddings(
-            model_id=model.provider_resource_id,
-            contents=contents,
-            text_truncation=text_truncation,
-            output_dimension=output_dimension,
-            task_type=task_type,
-        )
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index ff2fe6401..82252b04d 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -136,16 +136,6 @@ class RunpodInferenceAdapter(
             **get_sampling_options(request.sampling_params),
         }
 
-    async def embeddings(
-        self,
-        model: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 27597900f..e1632e4a0 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -12,14 +12,11 @@ from pydantic import SecretStr
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     CompletionRequest,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
@@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -306,16 +302,6 @@ class _HfAdapter(
             **self._build_options(request.sampling_params, request.response_format),
         )
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index c199677be..083c528bb 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -12,14 +12,11 @@ from together.constants import BASE_URL
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     CompletionRequest,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     Inference,
     LogProbConfig,
     Message,
@@ -27,7 +24,6 @@ from llama_stack.apis.inference import (
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -50,8 +46,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
     chat_completion_request_to_prompt,
     completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
     request_has_media,
 )
 
@@ -247,26 +241,6 @@ class TogetherInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, Need
         logger.debug(f"params to together: {params}")
         return params
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self.model_store.get_model(model_id)
-        assert all(not content_has_media(content) for content in contents), (
-            "Together does not support media for embeddings"
-        )
-        client = self._get_client()
-        r = await client.embeddings.create(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-        )
-        embeddings = [item.embedding for item in r.data]
-        return EmbeddingsResponse(embeddings=embeddings)
-
     async def list_models(self) -> list[Model] | None:
         self._model_cache = {}
         # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 8fbb4b815..bef5cbf2c 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -16,7 +16,6 @@ from openai.types.chat.chat_completion_chunk import (
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
     TextDelta,
     ToolCallDelta,
     ToolCallParseStatus,
@@ -31,8 +30,6 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     CompletionResponse,
     CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     GrammarResponseFormat,
     Inference,
     JsonSchemaResponseFormat,
@@ -41,7 +38,6 @@ from llama_stack.apis.inference import (
     ModelStore,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -74,8 +70,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
     completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
     request_has_media,
 )
 
@@ -550,27 +544,3 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
             "stream": request.stream,
             **options,
         }
-
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self._get_model(model_id)
-
-        kwargs = {}
-        assert model.model_type == ModelType.embedding
-        assert model.metadata.get("embedding_dimension")
-        kwargs["dimensions"] = model.metadata.get("embedding_dimension")
-        assert all(not content_has_media(content) for content in contents), "VLLM does not support media for embeddings"
-        response = await self.client.embeddings.create(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-            **kwargs,
-        )
-
-        embeddings = [data.embedding for data in response.data]
-        return EmbeddingsResponse(embeddings=embeddings)
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index cb8b45565..00b9acc06 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -11,13 +11,11 @@ from ibm_watsonx_ai.foundation_models import Model
 from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
 from openai import AsyncOpenAI
 
-from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
+from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     CompletionRequest,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     GreedySamplingStrategy,
     Inference,
     LogProbConfig,
@@ -30,7 +28,6 @@ from llama_stack.apis.inference import (
     OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -265,16 +262,6 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
         }
         return params
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError("embedding is not supported for watsonx")
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py
index 9bd0aa8ce..facc59f65 100644
--- a/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -15,16 +15,11 @@ if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
 from llama_stack.apis.inference import (
-    EmbeddingsResponse,
-    EmbeddingTaskType,
-    InterleavedContentItem,
     ModelStore,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
-    TextTruncation,
 )
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 EMBEDDING_MODELS = {}
 
@@ -35,23 +30,6 @@ log = get_logger(name=__name__, category="providers::utils")
 class SentenceTransformerEmbeddingMixin:
     model_store: ModelStore
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self.model_store.get_model(model_id)
-        embedding_model = await self._load_sentence_transformer_model(model.provider_resource_id)
-        embeddings = await asyncio.to_thread(
-            embedding_model.encode,
-            [interleaved_content_as_str(content) for content in contents],
-            show_progress_bar=False,
-        )
-        return EmbeddingsResponse(embeddings=embeddings)
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index b1e38f323..966081e9f 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -11,14 +11,11 @@ import litellm
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     ChatCompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
     InferenceProvider,
     JsonSchemaResponseFormat,
     LogProbConfig,
@@ -32,7 +29,6 @@ from llama_stack.apis.inference import (
     OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
-    TextTruncation,
     ToolChoice,
     ToolConfig,
     ToolDefinition,
@@ -50,9 +46,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
     get_sampling_options,
     prepare_openai_completion_params,
 )
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
@@ -269,24 +262,6 @@ class LiteLLMOpenAIMixin(
             )
         return api_key
 
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self.model_store.get_model(model_id)
-
-        response = litellm.embedding(
-            model=self.get_litellm_model_name(model.provider_resource_id),
-            input=[interleaved_content_as_str(content) for content in contents],
-        )
-
-        embeddings = [data["embedding"] for data in response["data"]]
-        return EmbeddingsResponse(embeddings=embeddings)
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 90108d7a0..9ee5c82f4 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -5,13 +5,12 @@
 # the root directory of this source tree.
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import EmbeddingsResponse, Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.datatypes import HealthStatus
@@ -70,13 +69,6 @@ def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock:
     return mock_vector_db
 
 
-@pytest.fixture
-def mock_inference_api(sample_embeddings):
-    mock_api = MagicMock(spec=Inference)
-    mock_api.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings))
-    return mock_api
-
-
 @pytest.fixture
 def mock_files_api():
     mock_api = MagicMock(spec=Files)
@@ -96,22 +88,6 @@ async def faiss_index(embedding_dimension):
     yield index
 
 
-@pytest.fixture
-async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter:
-    # Create the adapter
-    adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api)
-
-    # Create a mock KVStore
-    mock_kvstore = MagicMock()
-    mock_kvstore.values_in_range = AsyncMock(return_value=[])
-
-    # Patch the initialize method to avoid the kvstore_impl call
-    with patch.object(FaissVectorIOAdapter, "initialize"):
-        # Set the kvstore directly
-        adapter.kvstore = mock_kvstore
-        yield adapter
-
-
 async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical(
     faiss_index, sample_chunks, sample_embeddings, embedding_dimension
 ):

From aac42ddcc2832133873ba1b7cd1d74996e21564a Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Mon, 29 Sep 2025 15:42:09 -0400
Subject: [PATCH 02/13] feat(api): level inference/rerank and remove
 experimental (#3565)

# What does this PR do?

inference/rerank is the one route in the API intended to not be
deprecated. Level it as v1alpha.

Additionally, remove `experimental` and opt to instead use `v1alpha`
which itself implies an experimental state based on the original
proposal

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 docs/static/llama-stack-spec.html       |  2 +-
 docs/static/llama-stack-spec.yaml       |  2 +-
 llama_stack/apis/inference/inference.py |  4 ++--
 llama_stack/core/resolver.py            | 11 +++++++++--
 llama_stack/schema_utils.py             |  4 ----
 5 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 7845fb068..32ead1764 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -5431,7 +5431,7 @@
                 }
             }
         },
-        "/v1/inference/rerank": {
+        "/v1alpha/inference/rerank": {
             "post": {
                 "responses": {
                     "200": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 8cbbccaa2..3b5b92060 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3895,7 +3895,7 @@ paths:
             schema:
               $ref: '#/components/schemas/QueryTracesRequest'
         required: true
-  /v1/inference/rerank:
+  /v1alpha/inference/rerank:
     post:
       responses:
         '200':
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index c6a4e4f60..134da5bf8 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -21,7 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
@@ -1070,7 +1070,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def rerank(
         self,
         model: str,
diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py
index 373446de6..f421c47ed 100644
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@@ -29,6 +29,7 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
     AccessRule,
@@ -412,8 +413,14 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
 
     mro = type(obj).__mro__
     for name, value in inspect.getmembers(protocol):
-        if inspect.isfunction(value) and hasattr(value, "__webmethod__"):
-            if value.__webmethod__.experimental:
+        if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
+            has_alpha_api = False
+            for webmethod in value.__webmethods__:
+                if webmethod.level == LLAMA_STACK_API_V1ALPHA:
+                    has_alpha_api = True
+                    break
+            # if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
+            if has_alpha_api:
                 continue
             if not hasattr(obj, name):
                 missing_methods.append((name, "missing"))
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 4f8b4edff..c58fcdd01 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -22,7 +22,6 @@ class WebMethod:
     raw_bytes_request_body: bool | None = False
     # A descriptive name of the corresponding span created by tracing
     descriptive_name: str | None = None
-    experimental: bool | None = False
     required_scope: str | None = None
     deprecated: bool | None = False
 
@@ -39,7 +38,6 @@ def webmethod(
     response_examples: list[Any] | None = None,
     raw_bytes_request_body: bool | None = False,
     descriptive_name: str | None = None,
-    experimental: bool | None = False,
     required_scope: str | None = None,
     deprecated: bool | None = False,
 ) -> Callable[[T], T]:
@@ -50,7 +48,6 @@ def webmethod(
     :param public: True if the operation can be invoked without prior authentication.
     :param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON.
     :param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON.
-    :param experimental: True if the operation is experimental and subject to change.
     :param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
     """
 
@@ -64,7 +61,6 @@ def webmethod(
             response_examples=response_examples,
             raw_bytes_request_body=raw_bytes_request_body,
             descriptive_name=descriptive_name,
-            experimental=experimental,
             required_scope=required_scope,
             deprecated=deprecated,
         )

From 45f438c027176c783a1068894b9147eb008ffa5e Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 29 Sep 2025 16:11:37 -0400
Subject: [PATCH 03/13] chore: skip safety tests when shield not available
 (#3592)

# What does this PR do?

we skip embedding tests when the embedding_model_id isn't provided. same
for completion / chat tests when text_model_id isn't given.

instead of failing safety tests when a shield_id isn't provided, we'll
skip them too.

## Test Plan

ci

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 tests/integration/fixtures/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index ee4c5755a..835a701bc 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -166,7 +166,7 @@ def model_providers(llama_stack_client):
 
 @pytest.fixture(autouse=True)
 def skip_if_no_model(request):
-    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id"]
+    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
     test_func = request.node.function
 
     actual_params = inspect.signature(test_func).parameters.keys()

From 7c888fc0daa97fd550bb7cdc35f8fa1e3648c443 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 29 Sep 2025 16:13:53 -0400
Subject: [PATCH 04/13] feat: update eval runner to use openai endpoints
 (#3588)

# What does this PR do?

move the eval=inline::meta-reference implementation to use
openai_completion/openai_chat_completion

note: this breaks backward compatibility if eval setup used sampling
params' repetition_penalty or strategy

## Test Plan

ci w/ new recordings

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .../inline/eval/meta_reference/eval.py        | 31 ++++++----
 .../recordings/responses/8d035e153b6f.json    | 56 +++++++++++++++++++
 .../recordings/responses/92a9a916ef02.json    | 56 +++++++++++++++++++
 .../recordings/responses/c62eb5d7115e.json    | 56 +++++++++++++++++++
 .../recordings/responses/e25ab43491af.json    | 56 +++++++++++++++++++
 .../recordings/responses/f28a44c97ea7.json    | 56 +++++++++++++++++++
 6 files changed, 300 insertions(+), 11 deletions(-)
 create mode 100644 tests/integration/recordings/responses/8d035e153b6f.json
 create mode 100644 tests/integration/recordings/responses/92a9a916ef02.json
 create mode 100644 tests/integration/recordings/responses/c62eb5d7115e.json
 create mode 100644 tests/integration/recordings/responses/e25ab43491af.json
 create mode 100644 tests/integration/recordings/responses/f28a44c97ea7.json

diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index a03e8951c..0dfe23dca 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -12,7 +12,7 @@ from llama_stack.apis.agents import Agents, StepType
 from llama_stack.apis.benchmarks import Benchmark
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference, SystemMessage, UserMessage
+from llama_stack.apis.inference import Inference, OpenAISystemMessageParam, OpenAIUserMessageParam, UserMessage
 from llama_stack.apis.scoring import Scoring
 from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
 from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
@@ -159,31 +159,40 @@ class MetaReferenceEvalImpl(
     ) -> list[dict[str, Any]]:
         candidate = benchmark_config.eval_candidate
         assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided"
+        sampling_params = {"max_tokens": candidate.sampling_params.max_tokens}
 
         generations = []
         for x in tqdm(input_rows):
             if ColumnName.completion_input.value in x:
+                if candidate.sampling_params.stop:
+                    sampling_params["stop"] = candidate.sampling_params.stop
+
                 input_content = json.loads(x[ColumnName.completion_input.value])
-                response = await self.inference_api.completion(
+                response = await self.inference_api.openai_completion(
                     model=candidate.model,
-                    content=input_content,
-                    sampling_params=candidate.sampling_params,
+                    prompt=input_content,
+                    **sampling_params,
                 )
-                generations.append({ColumnName.generated_answer.value: response.completion_message.content})
+                generations.append({ColumnName.generated_answer.value: response.choices[0].text})
             elif ColumnName.chat_completion_input.value in x:
                 chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
-                input_messages = [UserMessage(**x) for x in chat_completion_input_json if x["role"] == "user"]
+                input_messages = [
+                    OpenAIUserMessageParam(**x) for x in chat_completion_input_json if x["role"] == "user"
+                ]
+
                 messages = []
                 if candidate.system_message:
                     messages.append(candidate.system_message)
-                messages += [SystemMessage(**x) for x in chat_completion_input_json if x["role"] == "system"]
+
+                messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"]
+
                 messages += input_messages
-                response = await self.inference_api.chat_completion(
-                    model_id=candidate.model,
+                response = await self.inference_api.openai_chat_completion(
+                    model=candidate.model,
                     messages=messages,
-                    sampling_params=candidate.sampling_params,
+                    **sampling_params,
                 )
-                generations.append({ColumnName.generated_answer.value: response.completion_message.content})
+                generations.append({ColumnName.generated_answer.value: response.choices[0].message.content})
             else:
                 raise ValueError("Invalid input row")
 
diff --git a/tests/integration/recordings/responses/8d035e153b6f.json b/tests/integration/recordings/responses/8d035e153b6f.json
new file mode 100644
index 000000000..18f3ee3cd
--- /dev/null
+++ b/tests/integration/recordings/responses/8d035e153b6f.json
@@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Who is the CEO of Meta?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-708",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759012142,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 24,
+          "prompt_tokens": 32,
+          "total_tokens": 56,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/92a9a916ef02.json b/tests/integration/recordings/responses/92a9a916ef02.json
new file mode 100644
index 000000000..5fe294826
--- /dev/null
+++ b/tests/integration/recordings/responses/92a9a916ef02.json
@@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the currency of Japan?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-343",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The currency of Japan is the Japanese yen (, ry\u014d) and its symbol, \u00a5.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759012146,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 20,
+          "prompt_tokens": 32,
+          "total_tokens": 52,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/c62eb5d7115e.json b/tests/integration/recordings/responses/c62eb5d7115e.json
new file mode 100644
index 000000000..fa872ac44
--- /dev/null
+++ b/tests/integration/recordings/responses/c62eb5d7115e.json
@@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the smallest country in the world?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-842",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The smallest country in the world is the Vatican City, an independent city-state located within Rome, Italy. It has a total area of approximately 0.44 km\u00b2 (0.17 sq mi) and a population of around 800 people.\n\nDespite its tiny size, the Vatican City is a sovereign state with its own government, currency, postal system, and even a small army (the Gendarmeria Romana). It's also home to numerous iconic landmarks, including St. Peter's Basilica, the Sistine Chapel, and the Vatican Museums.\n\nThe Vatican City is so small that it can fit entirely within an average American city park!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759012145,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 133,
+          "prompt_tokens": 34,
+          "total_tokens": 167,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/e25ab43491af.json b/tests/integration/recordings/responses/e25ab43491af.json
new file mode 100644
index 000000000..9fb331942
--- /dev/null
+++ b/tests/integration/recordings/responses/e25ab43491af.json
@@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-808",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The capital of France is Paris.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759012142,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 8,
+          "prompt_tokens": 32,
+          "total_tokens": 40,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/f28a44c97ea7.json b/tests/integration/recordings/responses/f28a44c97ea7.json
new file mode 100644
index 000000000..d50851dfd
--- /dev/null
+++ b/tests/integration/recordings/responses/f28a44c97ea7.json
@@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the largest planet in our solar system?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-282",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The largest planet in our solar system is Jupiter. It is a gas giant, with a diameter of approximately 142,984 kilometers (88,846 miles). This makes it more than 11 times the diameter of the Earth and more than 2.5 times the mass of all the other planets in our solar system combined.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759012143,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 67,
+          "prompt_tokens": 35,
+          "total_tokens": 102,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}

From 498be131a182ff9d6eddd8ebaa8869d3d4ceb123 Mon Sep 17 00:00:00 2001
From: Alexey Rybak <50731695+reluctantfuturist@users.noreply.github.com>
Date: Mon, 29 Sep 2025 13:14:05 -0700
Subject: [PATCH 05/13] docs: update image paths (#3599)

# What does this PR do?
* Updates image paths for images in docs/resources/ to proper static
image locations

## Test Plan
* `npm run build` builds documentation properly
---
 docs/docs/references/llama_cli_reference/index.md  |   2 +-
 docs/getting_started.ipynb                         |   2 +-
 docs/{resources => static/img}/agentic-system.png  | Bin
 docs/{resources => static/img}/list-templates.png  | Bin
 docs/{resources => static/img}/llama-stack.png     | Bin
 docs/{resources => static/img}/model-lifecycle.png | Bin
 docs/{resources => static/img}/prompt-format.png   | Bin
 7 files changed, 2 insertions(+), 2 deletions(-)
 rename docs/{resources => static/img}/agentic-system.png (100%)
 rename docs/{resources => static/img}/list-templates.png (100%)
 rename docs/{resources => static/img}/llama-stack.png (100%)
 rename docs/{resources => static/img}/model-lifecycle.png (100%)
 rename docs/{resources => static/img}/prompt-format.png (100%)

diff --git a/docs/docs/references/llama_cli_reference/index.md b/docs/docs/references/llama_cli_reference/index.md
index fe3aa51ab..9b71a6795 100644
--- a/docs/docs/references/llama_cli_reference/index.md
+++ b/docs/docs/references/llama_cli_reference/index.md
@@ -261,7 +261,7 @@ You can even run `llama model prompt-format` see all of the templates and their
 ```
 llama model prompt-format -m Llama3.2-3B-Instruct
 ```
-![alt text](../../../resources/prompt-format.png)
+![alt text](/img/prompt-format.png)
 
 
 You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index 641cf4224..449bd2be1 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -1013,7 +1013,7 @@
         "\n",
         "\n",
         "\n",
-        "<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/resources/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
+        "<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/static/img/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
         "\n",
         "\n",
         "Agents are characterized by having access to\n",
diff --git a/docs/resources/agentic-system.png b/docs/static/img/agentic-system.png
similarity index 100%
rename from docs/resources/agentic-system.png
rename to docs/static/img/agentic-system.png
diff --git a/docs/resources/list-templates.png b/docs/static/img/list-templates.png
similarity index 100%
rename from docs/resources/list-templates.png
rename to docs/static/img/list-templates.png
diff --git a/docs/resources/llama-stack.png b/docs/static/img/llama-stack.png
similarity index 100%
rename from docs/resources/llama-stack.png
rename to docs/static/img/llama-stack.png
diff --git a/docs/resources/model-lifecycle.png b/docs/static/img/model-lifecycle.png
similarity index 100%
rename from docs/resources/model-lifecycle.png
rename to docs/static/img/model-lifecycle.png
diff --git a/docs/resources/prompt-format.png b/docs/static/img/prompt-format.png
similarity index 100%
rename from docs/resources/prompt-format.png
rename to docs/static/img/prompt-format.png

From e9eb004bf8d6fbd466793e93370eb90e98f28552 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 29 Sep 2025 16:14:41 -0400
Subject: [PATCH 06/13] fix: remove inference.completion from docs (#3589)

# What does this PR do?

now that /v1/inference/completion has been removed, no docs should refer
to it

this cleans up remaining references

## Test Plan

ci

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .../references/python_sdk_reference/index.md  |  1 -
 docs/getting_started.ipynb                    | 14 ++-----
 .../Llama_Stack_NVIDIA_E2E_Flow.ipynb         | 38 +++++++------------
 .../remote/inference/nvidia/NVIDIA.md         | 19 ----------
 .../remote/post_training/nvidia/README.md     | 12 +++---
 tests/integration/README.md                   |  6 +--
 6 files changed, 26 insertions(+), 64 deletions(-)

diff --git a/docs/docs/references/python_sdk_reference/index.md b/docs/docs/references/python_sdk_reference/index.md
index e0b29363e..bce87e14a 100644
--- a/docs/docs/references/python_sdk_reference/index.md
+++ b/docs/docs/references/python_sdk_reference/index.md
@@ -217,7 +217,6 @@ from llama_stack_client.types import (
 Methods:
 
 - <code title="post /v1/inference/chat-completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
-- <code title="post /v1/inference/completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_response.py">InferenceCompletionResponse</a></code>
 - <code title="post /v1/inference/embeddings">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
 
 ## VectorIo
diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index 449bd2be1..56aef2b7d 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -824,16 +824,10 @@
         "\n",
         "\n",
         "user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
-        "response = client.inference.completion(\n",
-        "    model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
-        "    content=user_input,\n",
-        "    stream=False,\n",
-        "    sampling_params={\n",
-        "        \"strategy\": {\n",
-        "            \"type\": \"greedy\",\n",
-        "        },\n",
-        "        \"max_tokens\": 50,\n",
-        "    },\n",
+        "response = client.chat.completions.create(\n",
+        "    model=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
+        "    messages=[{\"role\": \"user\", \"content\": user_input}],\n",
+        "    max_tokens=50,\n",
         "    response_format={\n",
         "        \"type\": \"json_schema\",\n",
         "        \"json_schema\": Output.model_json_schema(),\n",
diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
index d8f29d999..601276526 100644
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@@ -706,20 +706,15 @@
     "    provider_id=\"nvidia\",\n",
     ")\n",
     "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
     "    stream=False,\n",
-    "    model_id=CUSTOMIZED_MODEL_DIR,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=CUSTOMIZED_MODEL_DIR,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
     ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
    ]
   },
   {
@@ -1233,20 +1228,15 @@
     "    provider_id=\"nvidia\",\n",
     ")\n",
     "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
     "    stream=False,\n",
-    "    model_id=customized_chat_model_dir,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=customized_chat_model_dir,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
     ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
    ]
   },
   {
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index d9c18533a..4cb2dc394 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -39,25 +39,6 @@ client = LlamaStackAsLibraryClient("nvidia")
 client.initialize()
 ```
 
-### Create Completion
-
-The following example shows how to create a completion for an NVIDIA NIM.
-
-> [!NOTE]
-> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.
-
-```python
-response = client.inference.completion(
-    model_id="meta-llama/Llama-3.1-8B-Instruct",
-    content="Complete the sentence using one word: Roses are red, violets are :",
-    stream=False,
-    sampling_params={
-        "max_tokens": 50,
-    },
-)
-print(f"Response: {response.content}")
-```
-
 ### Create Chat Completion
 
 The following example shows how to create a chat completion for an NVIDIA NIM.
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md
index 6647316df..9b088a615 100644
--- a/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -140,13 +140,11 @@ client.models.register(
 #### 2. Inference with the fine-tuned model
 
 ```python
-response = client.inference.completion(
-    content="Complete the sentence using one word: Roses are red, violets are ",
+response = client.completions.create(
+    prompt="Complete the sentence using one word: Roses are red, violets are ",
     stream=False,
-    model_id="test-example-model@v1",
-    sampling_params={
-        "max_tokens": 50,
-    },
+    model="test-example-model@v1",
+    max_tokens=50,
 )
-print(response.content)
+print(response.choices[0].text)
 ```
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 467f97e02..b68526410 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -178,10 +178,10 @@ Note that when re-recording tests, you must use a Stack pointing to a server (i.
 
 ### Basic Test Pattern
 ```python
-def test_basic_completion(llama_stack_client, text_model_id):
-    response = llama_stack_client.inference.completion(
+def test_basic_chat_completion(llama_stack_client, text_model_id):
+    response = llama_stack_client.inference.chat_completion(
         model_id=text_model_id,
-        content=CompletionMessage(role="user", content="Hello"),
+        messages=[{"role": "user", "content": "Hello"}],
     )
 
     # Test structure, not AI output quality

From 455579a88eb3206a8658eacab44974d59ee3b2fa Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Mon, 29 Sep 2025 13:55:59 -0700
Subject: [PATCH 07/13] fix: Remove deprecated user param in
 OpenAIResponseObject (#3596)

# What does this PR do?
Just removing the deprecated User param in `OpenAIResponseObject`

Closing https://github.com/llamastack/llama-stack/issues/3482

## Test Plan
CI
---
 docs/static/llama-stack-spec.html                       | 8 --------
 docs/static/llama-stack-spec.yaml                       | 8 --------
 llama_stack/apis/agents/openai_responses.py             | 2 --
 llama_stack/ui/app/logs/responses/[id]/page.tsx         | 1 -
 llama_stack/ui/components/responses/responses-table.tsx | 1 -
 5 files changed, 20 deletions(-)

diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 32ead1764..46d101414 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -9456,10 +9456,6 @@
                     "truncation": {
                         "type": "string",
                         "description": "(Optional) Truncation strategy applied to the response"
-                    },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) User identifier associated with the request"
                     }
                 },
                 "additionalProperties": false,
@@ -13594,10 +13590,6 @@
                         "type": "string",
                         "description": "(Optional) Truncation strategy applied to the response"
                     },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) User identifier associated with the request"
-                    },
                     "input": {
                         "type": "array",
                         "items": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 3b5b92060..65bc9a0b4 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -6884,10 +6884,6 @@ components:
           type: string
           description: >-
             (Optional) Truncation strategy applied to the response
-        user:
-          type: string
-          description: >-
-            (Optional) User identifier associated with the request
       additionalProperties: false
       required:
         - created_at
@@ -10082,10 +10078,6 @@ components:
           type: string
           description: >-
             (Optional) Truncation strategy applied to the response
-        user:
-          type: string
-          description: >-
-            (Optional) User identifier associated with the request
         input:
           type: array
           items:
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 591992479..b26b11f4f 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -336,7 +336,6 @@ class OpenAIResponseObject(BaseModel):
     :param text: Text formatting configuration for the response
     :param top_p: (Optional) Nucleus sampling parameter used for generation
     :param truncation: (Optional) Truncation strategy applied to the response
-    :param user: (Optional) User identifier associated with the request
     """
 
     created_at: int
@@ -354,7 +353,6 @@ class OpenAIResponseObject(BaseModel):
     text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
     top_p: float | None = None
     truncation: str | None = None
-    user: str | None = None
 
 
 @json_schema_type
diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/llama_stack/ui/app/logs/responses/[id]/page.tsx
index 922d35531..305e5752a 100644
--- a/llama_stack/ui/app/logs/responses/[id]/page.tsx
+++ b/llama_stack/ui/app/logs/responses/[id]/page.tsx
@@ -41,7 +41,6 @@ export default function ResponseDetailPage() {
       temperature: responseData.temperature,
       top_p: responseData.top_p,
       truncation: responseData.truncation,
-      user: responseData.user,
     };
   };
 
diff --git a/llama_stack/ui/components/responses/responses-table.tsx b/llama_stack/ui/components/responses/responses-table.tsx
index 0c0f8e56b..415e9ec2c 100644
--- a/llama_stack/ui/components/responses/responses-table.tsx
+++ b/llama_stack/ui/components/responses/responses-table.tsx
@@ -43,7 +43,6 @@ const convertResponseListData = (
     temperature: responseData.temperature,
     top_p: responseData.top_p,
     truncation: responseData.truncation,
-    user: responseData.user,
   };
 };
 

From ddf3f1735a3fde2f25191244215ba802a439f7d5 Mon Sep 17 00:00:00 2001
From: Michael Dawson <midawson@redhat.com>
Date: Mon, 29 Sep 2025 17:09:08 -0400
Subject: [PATCH 08/13] fix: ensure usage is requested if telemetry is enabled
 (#3571)

# What does this PR do?
Refs: https://github.com/llamastack/llama-stack/issues/3420

When telemetry is enabled the router uncondionally expects the usage
attribute to be availble and fails if it is not present.

Usage is not currently being requested by litellm_openai_mixin.py for
streaming requests when using the responses API which means that
providers like vertexai fail if telemetry is enabled and streaming is
used.

This is part of the required fix. Other part is in liteLLM, will plan to
submit PR for that soon.

## Test Plan
I applied this change along with the change for litellm in a llama stack
deployment and validated that I could make streaming requests through
the responses API to a gemini model and they would succeed instead of
failing due to the missing usage attribute when telemetry is enabled.

Signed-off-by: Michael Dawson <midawson@redhat.com>
---
 .../providers/utils/inference/litellm_openai_mixin.py     | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 966081e9f..10df664eb 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -374,6 +374,14 @@ class LiteLLMOpenAIMixin(
         top_p: float | None = None,
         user: str | None = None,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        # Add usage tracking for streaming when telemetry is active
+        from llama_stack.providers.utils.telemetry.tracing import get_current_span
+
+        if stream and get_current_span() is not None:
+            if stream_options is None:
+                stream_options = {"include_usage": True}
+            elif "include_usage" not in stream_options:
+                stream_options = {**stream_options, "include_usage": True}
         model_obj = await self.model_store.get_model(model)
         params = await prepare_openai_completion_params(
             model=self.get_litellm_model_name(model_obj.provider_resource_id),

From 5e7fed8bbbbfabf4c7ea02f8daa04a6f85087bdf Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 29 Sep 2025 16:14:35 -0700
Subject: [PATCH 09/13] feat(openai_movement): Change URL structures to kill
 /openai/v1  (part 1) (#3587)

The `/v1/openai/v1` prefix is annoying and now unnecessary given our
clearer focus on how to think about the API surface.

Let's kill it for the 0.3.0 update.

To make client-side changes feasible, we will do this in two parts. This
part adds a new route (sans `/openai/v1`) so the existing client
continues to work since the server supports both.

The next PR will be client-side (Stainless) changes which I will be
making shortly.

The final PR will remove the `/openai/v1` routes.

Note that all these changes will happen rapidly within this release
cycle. The entire set _will be backwards incompatible_.
---
 .github/workflows/conformance.yml             |    4 +-
 docs/static/llama-stack-spec.html             | 1534 +++++++++++++++++
 docs/static/llama-stack-spec.yaml             | 1142 ++++++++++++
 llama_stack/apis/agents/agents.py             |    5 +
 llama_stack/apis/batches/batches.py           |    4 +
 llama_stack/apis/files/files.py               |    5 +
 llama_stack/apis/inference/inference.py       |    5 +
 llama_stack/apis/safety/safety.py             |    1 +
 llama_stack/apis/vector_io/vector_io.py       |   16 +
 tests/integration/fixtures/common.py          |    2 +-
 .../inference/test_openai_embeddings.py       |    2 +-
 11 files changed, 2715 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
index b19b77cce..5eddb193f 100644
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@@ -66,6 +66,4 @@ jobs:
       # This step will fail if incompatible changes are detected, preventing breaking changes from being merged
       - name: Run OpenAPI Breaking Change Diff
         run: |
-          oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
-          --match-path '^/v1/vector-io' \
-          --match-path '^/v1/vector-dbs'
+          oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/'
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 46d101414..2072af745 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -427,6 +427,124 @@
                 }
             }
         },
+        "/v1/responses": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "summary": "List all OpenAI responses.",
+                "description": "List all OpenAI responses.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last response to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of responses to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter responses by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            },
+                            "text/event-stream": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "summary": "Create a new OpenAI response.",
+                "description": "Create a new OpenAI response.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/responses": {
             "get": {
                 "responses": {
@@ -809,6 +927,92 @@
                 ]
             }
         },
+        "/v1/responses/{response_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "summary": "Retrieve an OpenAI response by its ID.",
+                "description": "Retrieve an OpenAI response by its ID.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the OpenAI response to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIDeleteResponseObject",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIDeleteResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "summary": "Delete an OpenAI response by its ID.",
+                "description": "Delete an OpenAI response by its ID.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the OpenAI response to delete.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/responses/{response_id}": {
             "get": {
                 "responses": {
@@ -1434,6 +1638,50 @@
                 ]
             }
         },
+        "/v1/chat/completions/{completion_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A OpenAICompletionWithInputMessages.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAICompletionWithInputMessages"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "summary": "Describe a chat completion by its ID.",
+                "description": "Describe a chat completion by its ID.",
+                "parameters": [
+                    {
+                        "name": "completion_id",
+                        "in": "path",
+                        "description": "ID of the chat completion.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/chat/completions/{completion_id}": {
             "get": {
                 "responses": {
@@ -3149,6 +3397,126 @@
                 }
             }
         },
+        "/v1/chat/completions": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIChatCompletionResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIChatCompletionResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "summary": "List all chat completions.",
+                "description": "List all chat completions.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last chat completion to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The maximum number of chat completions to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort the chat completions by: \"asc\" or \"desc\". Defaults to \"desc\".",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIChatCompletion.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIChatCompletion"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIChatCompletionChunk"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/chat/completions": {
             "get": {
                 "responses": {
@@ -3421,6 +3789,98 @@
                 }
             }
         },
+        "/v1/responses/{response_id}/input_items": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An ListOpenAIResponseInputItem.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseInputItem"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "summary": "List input items for a given OpenAI response.",
+                "description": "List input items for a given OpenAI response.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the response to retrieve input items for.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "An item ID to list items after, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "before",
+                        "in": "query",
+                        "description": "An item ID to list items before, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "include",
+                        "in": "query",
+                        "description": "Additional fields to include in the response.",
+                        "required": false,
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to return the input items in. Default is desc.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/responses/{response_id}/input_items": {
             "get": {
                 "responses": {
@@ -4049,6 +4509,147 @@
                 }
             }
         },
+        "/v1/vector_stores/{vector_store_id}/files": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreListFilesResponse containing the list of files.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreListFilesResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "List files in a vector store.",
+                "description": "List files in a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to list files from.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "(Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "(Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "(Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "before",
+                        "in": "query",
+                        "description": "(Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "filter",
+                        "in": "query",
+                        "description": "(Optional) Filter by file status to only return files with the specified status.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/VectorStoreFileStatus"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreFileObject representing the attached file.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Attach a file to a vector store.",
+                "description": "Attach a file to a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to attach the file to.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiAttachFileToVectorStoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/vector_stores/{vector_store_id}/files": {
             "get": {
                 "responses": {
@@ -4190,6 +4791,50 @@
                 }
             }
         },
+        "/v1/completions": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAICompletion.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAICompletion"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/completions": {
             "post": {
                 "responses": {
@@ -4234,6 +4879,119 @@
                 }
             }
         },
+        "/v1/vector_stores": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreListResponse containing the list of vector stores.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreListResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Returns a list of vector stores.",
+                "description": "Returns a list of vector stores.",
+                "parameters": [
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "A cursor for use in pagination. `after` is an object ID that defines your place in the list.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "before",
+                        "in": "query",
+                        "description": "A cursor for use in pagination. `before` is an object ID that defines your place in the list.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreObject representing the created vector store.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Creates a vector store.",
+                "description": "Creates a vector store.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/vector_stores": {
             "get": {
                 "responses": {
@@ -4347,6 +5105,92 @@
                 }
             }
         },
+        "/v1/files/{file_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIFileObject containing file information.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Files"
+                ],
+                "summary": "Returns information about a specific file.",
+                "description": "Returns information about a specific file.",
+                "parameters": [
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to use for this request.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIFileDeleteResponse indicating successful deletion.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIFileDeleteResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Files"
+                ],
+                "summary": "Delete a file.",
+                "description": "Delete a file.",
+                "parameters": [
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to use for this request.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/files/{file_id}": {
             "get": {
                 "responses": {
@@ -4433,6 +5277,144 @@
                 ]
             }
         },
+        "/v1/vector_stores/{vector_store_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreObject representing the vector store.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Retrieves a vector store.",
+                "description": "Retrieves a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreObject representing the updated vector store.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Updates a vector store.",
+                "description": "Updates a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to update.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiUpdateVectorStoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreDeleteResponse indicating the deletion status.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreDeleteResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Delete a vector store.",
+                "description": "Delete a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to delete.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/vector_stores/{vector_store_id}": {
             "get": {
                 "responses": {
@@ -4571,6 +5553,171 @@
                 ]
             }
         },
+        "/v1/vector_stores/{vector_store_id}/files/{file_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreFileObject representing the file.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Retrieves a vector store file.",
+                "description": "Retrieves a vector store file.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store containing the file to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreFileObject representing the updated file.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Updates a vector store file.",
+                "description": "Updates a vector store file.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store containing the file to update.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to update.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiUpdateVectorStoreFileRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreFileDeleteResponse indicating the deletion status.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileDeleteResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Delete a vector store file.",
+                "description": "Delete a vector store file.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store containing the file to delete.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to delete.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}": {
             "get": {
                 "responses": {
@@ -4736,6 +5883,50 @@
                 ]
             }
         },
+        "/v1/embeddings": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIEmbeddingsResponse containing the embeddings.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIEmbeddingsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiEmbeddingsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/embeddings": {
             "post": {
                 "responses": {
@@ -4780,6 +5971,154 @@
                 }
             }
         },
+        "/v1/files": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An ListOpenAIFileResponse containing the list of files.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIFileResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Files"
+                ],
+                "summary": "Returns a list of files that belong to the user's organization.",
+                "description": "Returns a list of files that belong to the user's organization.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    },
+                    {
+                        "name": "purpose",
+                        "in": "query",
+                        "description": "Only return files with the given purpose.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/OpenAIFilePurpose"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIFileObject representing the uploaded file.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Files"
+                ],
+                "summary": "Upload a file that can be used across various endpoints.",
+                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "multipart/form-data": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {
+                                    "file": {
+                                        "type": "string",
+                                        "format": "binary"
+                                    },
+                                    "purpose": {
+                                        "$ref": "#/components/schemas/OpenAIFilePurpose"
+                                    },
+                                    "expires_after_anchor": {
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "null"
+                                            }
+                                        ]
+                                    },
+                                    "expires_after_seconds": {
+                                        "oneOf": [
+                                            {
+                                                "type": "integer"
+                                            },
+                                            {
+                                                "type": "null"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "required": [
+                                    "file",
+                                    "purpose",
+                                    "expires_after_anchor",
+                                    "expires_after_seconds"
+                                ]
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/files": {
             "get": {
                 "responses": {
@@ -4962,6 +6301,50 @@
                 "parameters": []
             }
         },
+        "/v1/files/{file_id}/content": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The raw file content as a binary response.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Response"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Files"
+                ],
+                "summary": "Returns the contents of the specified file.",
+                "description": "Returns the contents of the specified file.",
+                "parameters": [
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to use for this request.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/files/{file_id}/content": {
             "get": {
                 "responses": {
@@ -5006,6 +6389,59 @@
                 ]
             }
         },
+        "/v1/vector_stores/{vector_store_id}/files/{file_id}/content": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A list of InterleavedContent representing the file contents.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileContentsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Retrieves the contents of a vector store file.",
+                "description": "Retrieves the contents of a vector store file.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store containing the file to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "file_id",
+                        "in": "path",
+                        "description": "The ID of the file to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content": {
             "get": {
                 "responses": {
@@ -5059,6 +6495,60 @@
                 ]
             }
         },
+        "/v1/vector_stores/{vector_store_id}/search": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreSearchResponse containing the search results.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreSearchResponsePage"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "summary": "Search for chunks in a vector store.",
+                "description": "Search for chunks in a vector store.\nSearches a vector store for relevant chunks based on a query and optional file attribute filters.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to search.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiSearchVectorStoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/vector_stores/{vector_store_id}/search": {
             "post": {
                 "responses": {
@@ -5660,6 +7150,50 @@
                 }
             }
         },
+        "/v1/moderations": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A moderation object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ModerationObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Safety"
+                ],
+                "summary": "Classifies if text and/or image inputs are potentially harmful.",
+                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RunModerationRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/moderations": {
             "post": {
                 "responses": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 65bc9a0b4..7b51116ba 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -286,6 +286,87 @@ paths:
             schema:
               $ref: '#/components/schemas/CreateAgentTurnRequest'
         required: true
+  /v1/responses:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      summary: List all OpenAI responses.
+      description: List all OpenAI responses.
+      parameters:
+        - name: after
+          in: query
+          description: The ID of the last response to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: The number of responses to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter responses by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort responses by when sorted by created_at ('asc' or 'desc').
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
+    post:
+      responses:
+        '200':
+          description: An OpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObjectStream'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      summary: Create a new OpenAI response.
+      description: Create a new OpenAI response.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+        required: true
   /v1/openai/v1/responses:
     get:
       responses:
@@ -558,6 +639,66 @@ paths:
           required: true
           schema:
             type: string
+  /v1/responses/{response_id}:
+    get:
+      responses:
+        '200':
+          description: An OpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      summary: Retrieve an OpenAI response by its ID.
+      description: Retrieve an OpenAI response by its ID.
+      parameters:
+        - name: response_id
+          in: path
+          description: >-
+            The ID of the OpenAI response to retrieve.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: An OpenAIDeleteResponseObject
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIDeleteResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      summary: Delete an OpenAI response by its ID.
+      description: Delete an OpenAI response by its ID.
+      parameters:
+        - name: response_id
+          in: path
+          description: The ID of the OpenAI response to delete.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/responses/{response_id}:
     get:
       responses:
@@ -998,6 +1139,36 @@ paths:
           required: true
           schema:
             type: string
+  /v1/chat/completions/{completion_id}:
+    get:
+      responses:
+        '200':
+          description: A OpenAICompletionWithInputMessages.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      summary: Describe a chat completion by its ID.
+      description: Describe a chat completion by its ID.
+      parameters:
+        - name: completion_id
+          in: path
+          description: ID of the chat completion.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/chat/completions/{completion_id}:
     get:
       responses:
@@ -2224,6 +2395,93 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterBenchmarkRequest'
         required: true
+  /v1/chat/completions:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIChatCompletionResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      summary: List all chat completions.
+      description: List all chat completions.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            The ID of the last chat completion to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            The maximum number of chat completions to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort the chat completions by: "asc" or "desc". Defaults to
+            "desc".
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
+    post:
+      responses:
+        '200':
+          description: An OpenAIChatCompletion.
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - $ref: '#/components/schemas/OpenAIChatCompletion'
+                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      summary: >-
+        Generate an OpenAI-compatible chat completion for the given messages using
+        the specified model.
+      description: >-
+        Generate an OpenAI-compatible chat completion for the given messages using
+        the specified model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiChatCompletionRequest'
+        required: true
   /v1/openai/v1/chat/completions:
     get:
       responses:
@@ -2417,6 +2675,77 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterModelRequest'
         required: true
+  /v1/responses/{response_id}/input_items:
+    get:
+      responses:
+        '200':
+          description: An ListOpenAIResponseInputItem.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseInputItem'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      summary: >-
+        List input items for a given OpenAI response.
+      description: >-
+        List input items for a given OpenAI response.
+      parameters:
+        - name: response_id
+          in: path
+          description: >-
+            The ID of the response to retrieve input items for.
+          required: true
+          schema:
+            type: string
+        - name: after
+          in: query
+          description: >-
+            An item ID to list items after, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: before
+          in: query
+          description: >-
+            An item ID to list items before, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: include
+          in: query
+          description: >-
+            Additional fields to include in the response.
+          required: false
+          schema:
+            type: array
+            items:
+              type: string
+        - name: limit
+          in: query
+          description: >-
+            A limit on the number of objects to be returned. Limit can range between
+            1 and 100, and the default is 20.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            The order to return the input items in. Default is desc.
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
   /v1/openai/v1/responses/{response_id}/input_items:
     get:
       responses:
@@ -2871,6 +3200,115 @@ paths:
             schema:
               $ref: '#/components/schemas/LogEventRequest'
         required: true
+  /v1/vector_stores/{vector_store_id}/files:
+    get:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreListFilesResponse containing the list of files.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreListFilesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: List files in a vector store.
+      description: List files in a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store to list files from.
+          required: true
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            (Optional) A limit on the number of objects to be returned. Limit can
+            range between 1 and 100, and the default is 20.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            (Optional) Sort order by the `created_at` timestamp of the objects. `asc`
+            for ascending order and `desc` for descending order.
+          required: false
+          schema:
+            type: string
+        - name: after
+          in: query
+          description: >-
+            (Optional) A cursor for use in pagination. `after` is an object ID that
+            defines your place in the list.
+          required: false
+          schema:
+            type: string
+        - name: before
+          in: query
+          description: >-
+            (Optional) A cursor for use in pagination. `before` is an object ID that
+            defines your place in the list.
+          required: false
+          schema:
+            type: string
+        - name: filter
+          in: query
+          description: >-
+            (Optional) Filter by file status to only return files with the specified
+            status.
+          required: false
+          schema:
+            $ref: '#/components/schemas/VectorStoreFileStatus'
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreFileObject representing the attached file.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Attach a file to a vector store.
+      description: Attach a file to a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store to attach the file to.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
+        required: true
   /v1/openai/v1/vector_stores/{vector_store_id}/files:
     get:
       responses:
@@ -2980,6 +3418,40 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
         required: true
+  /v1/completions:
+    post:
+      responses:
+        '200':
+          description: An OpenAICompletion.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAICompletion'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      summary: >-
+        Generate an OpenAI-compatible completion for the given prompt using the specified
+        model.
+      description: >-
+        Generate an OpenAI-compatible completion for the given prompt using the specified
+        model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiCompletionRequest'
+        required: true
   /v1/openai/v1/completions:
     post:
       responses:
@@ -3014,6 +3486,93 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiCompletionRequest'
         required: true
+  /v1/vector_stores:
+    get:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreListResponse containing the list of vector stores.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreListResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Returns a list of vector stores.
+      description: Returns a list of vector stores.
+      parameters:
+        - name: limit
+          in: query
+          description: >-
+            A limit on the number of objects to be returned. Limit can range between
+            1 and 100, and the default is 20.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+            order and `desc` for descending order.
+          required: false
+          schema:
+            type: string
+        - name: after
+          in: query
+          description: >-
+            A cursor for use in pagination. `after` is an object ID that defines your
+            place in the list.
+          required: false
+          schema:
+            type: string
+        - name: before
+          in: query
+          description: >-
+            A cursor for use in pagination. `before` is an object ID that defines
+            your place in the list.
+          required: false
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreObject representing the created vector store.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Creates a vector store.
+      description: Creates a vector store.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
+        required: true
   /v1/openai/v1/vector_stores:
     get:
       responses:
@@ -3101,6 +3660,71 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
         required: true
+  /v1/files/{file_id}:
+    get:
+      responses:
+        '200':
+          description: >-
+            An OpenAIFileObject containing file information.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Files
+      summary: >-
+        Returns information about a specific file.
+      description: >-
+        Returns information about a specific file.
+      parameters:
+        - name: file_id
+          in: path
+          description: >-
+            The ID of the file to use for this request.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: >-
+            An OpenAIFileDeleteResponse indicating successful deletion.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIFileDeleteResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Files
+      summary: Delete a file.
+      description: Delete a file.
+      parameters:
+        - name: file_id
+          in: path
+          description: >-
+            The ID of the file to use for this request.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/files/{file_id}:
     get:
       responses:
@@ -3166,6 +3790,103 @@ paths:
           required: true
           schema:
             type: string
+  /v1/vector_stores/{vector_store_id}:
+    get:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreObject representing the vector store.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Retrieves a vector store.
+      description: Retrieves a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: The ID of the vector store to retrieve.
+          required: true
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreObject representing the updated vector store.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Updates a vector store.
+      description: Updates a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: The ID of the vector store to update.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest'
+        required: true
+    delete:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreDeleteResponse indicating the deletion status.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreDeleteResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Delete a vector store.
+      description: Delete a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: The ID of the vector store to delete.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/vector_stores/{vector_store_id}:
     get:
       responses:
@@ -3263,6 +3984,124 @@ paths:
           required: true
           schema:
             type: string
+  /v1/vector_stores/{vector_store_id}/files/{file_id}:
+    get:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreFileObject representing the file.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Retrieves a vector store file.
+      description: Retrieves a vector store file.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store containing the file to retrieve.
+          required: true
+          schema:
+            type: string
+        - name: file_id
+          in: path
+          description: The ID of the file to retrieve.
+          required: true
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreFileObject representing the updated file.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Updates a vector store file.
+      description: Updates a vector store file.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store containing the file to update.
+          required: true
+          schema:
+            type: string
+        - name: file_id
+          in: path
+          description: The ID of the file to update.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest'
+        required: true
+    delete:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreFileDeleteResponse indicating the deletion status.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileDeleteResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Delete a vector store file.
+      description: Delete a vector store file.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store containing the file to delete.
+          required: true
+          schema:
+            type: string
+        - name: file_id
+          in: path
+          description: The ID of the file to delete.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
     get:
       responses:
@@ -3381,6 +4220,41 @@ paths:
           required: true
           schema:
             type: string
+  /v1/embeddings:
+    post:
+      responses:
+        '200':
+          description: >-
+            An OpenAIEmbeddingsResponse containing the embeddings.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      summary: >-
+        Generate OpenAI-compatible embeddings for the given input using the specified
+        model.
+      description: >-
+        Generate OpenAI-compatible embeddings for the given input using the specified
+        model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
+        required: true
   /v1/openai/v1/embeddings:
     post:
       responses:
@@ -3416,6 +4290,127 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
         required: true
+  /v1/files:
+    get:
+      responses:
+        '200':
+          description: >-
+            An ListOpenAIFileResponse containing the list of files.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIFileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Files
+      summary: >-
+        Returns a list of files that belong to the user's organization.
+      description: >-
+        Returns a list of files that belong to the user's organization.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            A cursor for use in pagination. `after` is an object ID that defines your
+            place in the list. For instance, if you make a list request and receive
+            100 objects, ending with obj_foo, your subsequent call can include after=obj_foo
+            in order to fetch the next page of the list.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            A limit on the number of objects to be returned. Limit can range between
+            1 and 10,000, and the default is 10,000.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+            order and `desc` for descending order.
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
+        - name: purpose
+          in: query
+          description: >-
+            Only return files with the given purpose.
+          required: false
+          schema:
+            $ref: '#/components/schemas/OpenAIFilePurpose'
+    post:
+      responses:
+        '200':
+          description: >-
+            An OpenAIFileObject representing the uploaded file.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Files
+      summary: >-
+        Upload a file that can be used across various endpoints.
+      description: >-
+        Upload a file that can be used across various endpoints.
+
+        The file upload should be a multipart form request with:
+
+        - file: The File object (not file name) to be uploaded.
+
+        - purpose: The intended purpose of the uploaded file.
+
+        - expires_after: Optional form values describing expiration for the file.
+        Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
+        Seconds must be between 3600 and 2592000 (1 hour to 30 days).
+      parameters: []
+      requestBody:
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              properties:
+                file:
+                  type: string
+                  format: binary
+                purpose:
+                  $ref: '#/components/schemas/OpenAIFilePurpose'
+                expires_after_anchor:
+                  oneOf:
+                    - type: string
+                    - type: 'null'
+                expires_after_seconds:
+                  oneOf:
+                    - type: integer
+                    - type: 'null'
+              required:
+                - file
+                - purpose
+                - expires_after_anchor
+                - expires_after_seconds
+        required: true
   /v1/openai/v1/files:
     get:
       responses:
@@ -3561,6 +4556,40 @@ paths:
       summary: List models using the OpenAI API.
       description: List models using the OpenAI API.
       parameters: []
+  /v1/files/{file_id}/content:
+    get:
+      responses:
+        '200':
+          description: >-
+            The raw file content as a binary response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Response'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Files
+      summary: >-
+        Returns the contents of the specified file.
+      description: >-
+        Returns the contents of the specified file.
+      parameters:
+        - name: file_id
+          in: path
+          description: >-
+            The ID of the file to use for this request.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/files/{file_id}/content:
     get:
       responses:
@@ -3595,6 +4624,46 @@ paths:
           required: true
           schema:
             type: string
+  /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
+    get:
+      responses:
+        '200':
+          description: >-
+            A list of InterleavedContent representing the file contents.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: >-
+        Retrieves the contents of a vector store file.
+      description: >-
+        Retrieves the contents of a vector store file.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store containing the file to retrieve.
+          required: true
+          schema:
+            type: string
+        - name: file_id
+          in: path
+          description: The ID of the file to retrieve.
+          required: true
+          schema:
+            type: string
   /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
     get:
       responses:
@@ -3635,6 +4704,47 @@ paths:
           required: true
           schema:
             type: string
+  /v1/vector_stores/{vector_store_id}/search:
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreSearchResponse containing the search results.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreSearchResponsePage'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      summary: Search for chunks in a vector store.
+      description: >-
+        Search for chunks in a vector store.
+
+        Searches a vector store for relevant chunks based on a query and optional
+        file attribute filters.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: The ID of the vector store to search.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
+        required: true
   /v1/openai/v1/vector_stores/{vector_store_id}/search:
     post:
       responses:
@@ -4063,6 +5173,38 @@ paths:
             schema:
               $ref: '#/components/schemas/RunEvalRequest'
         required: true
+  /v1/moderations:
+    post:
+      responses:
+        '200':
+          description: A moderation object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModerationObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Safety
+      summary: >-
+        Classifies if text and/or image inputs are potentially harmful.
+      description: >-
+        Classifies if text and/or image inputs are potentially harmful.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RunModerationRequest'
+        required: true
   /v1/openai/v1/moderations:
     post:
       responses:
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index e53ca82e2..de420be5d 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -695,6 +695,7 @@ class Agents(Protocol):
     # Both of these APIs are inherently stateful.
 
     @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
         response_id: str,
@@ -707,6 +708,7 @@ class Agents(Protocol):
         ...
 
     @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
         input: str | list[OpenAIResponseInput],
@@ -732,6 +734,7 @@ class Agents(Protocol):
         ...
 
     @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
         after: str | None = None,
@@ -750,6 +753,7 @@ class Agents(Protocol):
         ...
 
     @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
         response_id: str,
@@ -772,6 +776,7 @@ class Agents(Protocol):
         ...
 
     @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.
 
diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py
index 5890cbe04..1a64257e3 100644
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@@ -44,6 +44,7 @@ class Batches(Protocol):
     """
 
     @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
         input_file_id: str,
@@ -64,6 +65,7 @@ class Batches(Protocol):
         ...
 
     @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
 
@@ -73,6 +75,7 @@ class Batches(Protocol):
         ...
 
     @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
 
@@ -82,6 +85,7 @@ class Batches(Protocol):
         ...
 
     @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
         after: str | None = None,
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 7e45b55ee..d5abb6286 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -106,6 +106,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 class Files(Protocol):
     # OpenAI Files API Endpoints
     @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
         file: Annotated[UploadFile, File()],
@@ -129,6 +130,7 @@ class Files(Protocol):
         ...
 
     @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
         after: str | None = None,
@@ -148,6 +150,7 @@ class Files(Protocol):
         ...
 
     @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
         file_id: str,
@@ -161,6 +164,7 @@ class Files(Protocol):
         ...
 
     @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
         file_id: str,
@@ -174,6 +178,7 @@ class Files(Protocol):
         ...
 
     @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
         file_id: str,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 134da5bf8..29b014a11 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1090,6 +1090,7 @@ class InferenceProvider(Protocol):
         return  # this is so mypy's safe-super rule will consider the method concrete
 
     @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         # Standard OpenAI completion parameters
@@ -1141,6 +1142,7 @@ class InferenceProvider(Protocol):
         ...
 
     @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
         model: str,
@@ -1197,6 +1199,7 @@ class InferenceProvider(Protocol):
         ...
 
     @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
         model: str,
@@ -1226,6 +1229,7 @@ class Inference(InferenceProvider):
     """
 
     @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
         after: str | None = None,
@@ -1244,6 +1248,7 @@ class Inference(InferenceProvider):
         raise NotImplementedError("List chat completions is not implemented")
 
     @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.
 
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index 98367e9b0..d9ef6b2a1 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -115,6 +115,7 @@ class Safety(Protocol):
         ...
 
     @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
         """Classifies if text and/or image inputs are potentially harmful.
         :param input: Input (or inputs) to classify.
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 2850863c4..dfd93e481 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -474,6 +474,7 @@ class VectorIO(Protocol):
 
     # OpenAI Vector Stores API endpoints
     @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
         name: str | None = None,
@@ -500,6 +501,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
         limit: int | None = 20,
@@ -518,6 +520,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
@@ -530,6 +533,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store(
         self,
         vector_store_id: str,
@@ -548,6 +552,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store(
         self,
         vector_store_id: str,
@@ -560,6 +565,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_search_vector_store(
         self,
         vector_store_id: str,
@@ -586,6 +592,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_attach_file_to_vector_store(
         self,
         vector_store_id: str,
@@ -604,6 +611,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files_in_vector_store(
         self,
         vector_store_id: str,
@@ -628,6 +636,7 @@ class VectorIO(Protocol):
     @webmethod(
         route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
     )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store_file(
         self,
         vector_store_id: str,
@@ -646,6 +655,11 @@ class VectorIO(Protocol):
         method="GET",
         level=LLAMA_STACK_API_V1,
     )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
     async def openai_retrieve_vector_store_file_contents(
         self,
         vector_store_id: str,
@@ -662,6 +676,7 @@ class VectorIO(Protocol):
     @webmethod(
         route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
     )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store_file(
         self,
         vector_store_id: str,
@@ -680,6 +695,7 @@ class VectorIO(Protocol):
     @webmethod(
         route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
     )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store_file(
         self,
         vector_store_id: str,
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 835a701bc..68aa2b60b 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -274,7 +274,7 @@ def require_server(llama_stack_client):
 
 @pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
-    base_url = f"{llama_stack_client.base_url}/v1/openai/v1"
+    base_url = f"{llama_stack_client.base_url}/v1"
     return OpenAI(base_url=base_url, api_key="fake")
 
 
diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
index 92064b651..84e92706a 100644
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -87,7 +87,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
 
 @pytest.fixture
 def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    base_url = f"{client_with_models.base_url}/v1"
     return OpenAI(base_url=base_url, api_key="fake")
 
 
From 3a09f00cdb4d040b17b6c567537a702cf369ba61 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 29 Sep 2025 21:29:15 -0700
Subject: [PATCH 10/13] feat(files): fix expires_after API shape (#3604)

This was just quite incorrect. See source here:
https://platform.openai.com/docs/api-reference/files/create
---
 docs/openapi_generator/pyopenapi/generator.py |  14 +-
 docs/static/llama-stack-spec.html             | 692 +++++++++++-------
 docs/static/llama-stack-spec.yaml             | 377 ++++++----
 llama_stack/apis/files/files.py               |   7 +-
 .../providers/inline/files/localfs/files.py   |   6 +-
 .../providers/remote/files/s3/files.py        |  11 +-
 llama_stack/strong_typing/inspection.py       |  16 +
 llama_stack/strong_typing/schema.py           |   7 +-
 tests/unit/providers/files/test_s3_files.py   |  23 +-
 9 files changed, 705 insertions(+), 448 deletions(-)

diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 758fe7e8f..a38e02e7f 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import hashlib
+import inspect
 import ipaddress
 import types
 import typing
@@ -12,6 +13,7 @@ from dataclasses import make_dataclass
 from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 
 from fastapi import UploadFile
+from pydantic import BaseModel
 
 from llama_stack.apis.datatypes import Error
 from llama_stack.strong_typing.core import JsonType
@@ -632,14 +634,22 @@ class Generator:
                     base_type = get_args(param_type)[0]
                 else:
                     base_type = param_type
+
+                # Check if the type is optional
+                is_optional = is_type_optional(base_type)
+                if is_optional:
+                    base_type = unwrap_optional_type(base_type)
+
                 if base_type is UploadFile:
                     # File upload
                     properties[name] = {"type": "string", "format": "binary"}
                 else:
-                    # Form field
+                    # All other types - generate schema reference
+                    # This includes enums, BaseModels, and simple types
                     properties[name] = self.schema_builder.classdef_to_ref(base_type)
 
-                required_fields.append(name)
+                if not is_optional:
+                    required_fields.append(name)
 
             multipart_schema = {
                 "type": "object",
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 2072af745..616ebb4fc 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -6070,7 +6070,7 @@
                     "Files"
                 ],
                 "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
+                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -6085,32 +6085,13 @@
                                     "purpose": {
                                         "$ref": "#/components/schemas/OpenAIFilePurpose"
                                     },
-                                    "expires_after_anchor": {
-                                        "oneOf": [
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "null"
-                                            }
-                                        ]
-                                    },
-                                    "expires_after_seconds": {
-                                        "oneOf": [
-                                            {
-                                                "type": "integer"
-                                            },
-                                            {
-                                                "type": "null"
-                                            }
-                                        ]
+                                    "expires_after": {
+                                        "$ref": "#/components/schemas/ExpiresAfter"
                                     }
                                 },
                                 "required": [
                                     "file",
-                                    "purpose",
-                                    "expires_after_anchor",
-                                    "expires_after_seconds"
+                                    "purpose"
                                 ]
                             }
                         }
@@ -6218,7 +6199,7 @@
                     "Files"
                 ],
                 "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
+                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -6233,32 +6214,13 @@
                                     "purpose": {
                                         "$ref": "#/components/schemas/OpenAIFilePurpose"
                                     },
-                                    "expires_after_anchor": {
-                                        "oneOf": [
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "null"
-                                            }
-                                        ]
-                                    },
-                                    "expires_after_seconds": {
-                                        "oneOf": [
-                                            {
-                                                "type": "integer"
-                                            },
-                                            {
-                                                "type": "null"
-                                            }
-                                        ]
+                                    "expires_after": {
+                                        "$ref": "#/components/schemas/ExpiresAfter"
                                     }
                                 },
                                 "required": [
                                     "file",
-                                    "purpose",
-                                    "expires_after_anchor",
-                                    "expires_after_seconds"
+                                    "purpose"
                                 ]
                             }
                         }
@@ -7978,7 +7940,25 @@
                 "type": "object",
                 "properties": {
                     "strategy": {
-                        "$ref": "#/components/schemas/SamplingStrategy",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/GreedySamplingStrategy"
+                            },
+                            {
+                                "$ref": "#/components/schemas/TopPSamplingStrategy"
+                            },
+                            {
+                                "$ref": "#/components/schemas/TopKSamplingStrategy"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "greedy": "#/components/schemas/GreedySamplingStrategy",
+                                "top_p": "#/components/schemas/TopPSamplingStrategy",
+                                "top_k": "#/components/schemas/TopKSamplingStrategy"
+                            }
+                        },
                         "description": "The sampling strategy."
                     },
                     "max_tokens": {
@@ -8006,27 +7986,6 @@
                 "title": "SamplingParams",
                 "description": "Sampling parameters."
             },
-            "SamplingStrategy": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/GreedySamplingStrategy"
-                    },
-                    {
-                        "$ref": "#/components/schemas/TopPSamplingStrategy"
-                    },
-                    {
-                        "$ref": "#/components/schemas/TopKSamplingStrategy"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "greedy": "#/components/schemas/GreedySamplingStrategy",
-                        "top_p": "#/components/schemas/TopPSamplingStrategy",
-                        "top_k": "#/components/schemas/TopKSamplingStrategy"
-                    }
-                }
-            },
             "SystemMessage": {
                 "type": "object",
                 "properties": {
@@ -8609,7 +8568,25 @@
                         "description": "Type of the event"
                     },
                     "delta": {
-                        "$ref": "#/components/schemas/ContentDelta",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/TextDelta"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ImageDelta"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ToolCallDelta"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "text": "#/components/schemas/TextDelta",
+                                "image": "#/components/schemas/ImageDelta",
+                                "tool_call": "#/components/schemas/ToolCallDelta"
+                            }
+                        },
                         "description": "Content generated since last event. This can be one or more tokens, or a tool call."
                     },
                     "logprobs": {
@@ -8659,27 +8636,6 @@
                 "title": "ChatCompletionResponseStreamChunk",
                 "description": "A chunk of a streamed chat completion response."
             },
-            "ContentDelta": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/TextDelta"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ImageDelta"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ToolCallDelta"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "text": "#/components/schemas/TextDelta",
-                        "image": "#/components/schemas/ImageDelta",
-                        "tool_call": "#/components/schemas/ToolCallDelta"
-                    }
-                }
-            },
             "ImageDelta": {
                 "type": "object",
                 "properties": {
@@ -9608,7 +9564,37 @@
                 "type": "object",
                 "properties": {
                     "payload": {
-                        "$ref": "#/components/schemas/AgentTurnResponseEventPayload",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "event_type",
+                            "mapping": {
+                                "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
+                                "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
+                                "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
+                                "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
+                                "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload",
+                                "turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
+                            }
+                        },
                         "description": "Event-specific payload containing event data"
                     }
                 },
@@ -9619,39 +9605,6 @@
                 "title": "AgentTurnResponseEvent",
                 "description": "An event in an agent turn response stream."
             },
-            "AgentTurnResponseEventPayload": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "event_type",
-                    "mapping": {
-                        "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload",
-                        "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload",
-                        "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
-                        "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
-                        "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload",
-                        "turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
-                    }
-                }
-            },
             "AgentTurnResponseStepCompletePayload": {
                 "type": "object",
                 "properties": {
@@ -9752,7 +9705,25 @@
                         "description": "Unique identifier for the step within a turn"
                     },
                     "delta": {
-                        "$ref": "#/components/schemas/ContentDelta",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/TextDelta"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ImageDelta"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ToolCallDelta"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "text": "#/components/schemas/TextDelta",
+                                "image": "#/components/schemas/ImageDelta",
+                                "tool_call": "#/components/schemas/ToolCallDelta"
+                            }
+                        },
                         "description": "Incremental content changes during step execution"
                     }
                 },
@@ -11162,23 +11133,6 @@
                 "title": "OpenAIResponseOutputMessageMCPListTools",
                 "description": "MCP list tools output message containing available tools from an MCP server."
             },
-            "OpenAIResponseContentPart": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseContentPartOutputText"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText",
-                        "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal"
-                    }
-                }
-            },
             "OpenAIResponseContentPartOutputText": {
                 "type": "object",
                 "properties": {
@@ -11344,7 +11298,21 @@
                         "description": "Unique identifier of the output item containing this content part"
                     },
                     "part": {
-                        "$ref": "#/components/schemas/OpenAIResponseContentPart",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseContentPartOutputText"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText",
+                                "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                            }
+                        },
                         "description": "The content part that was added"
                     },
                     "sequence_number": {
@@ -11381,7 +11349,21 @@
                         "description": "Unique identifier of the output item containing this content part"
                     },
                     "part": {
-                        "$ref": "#/components/schemas/OpenAIResponseContentPart",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseContentPartOutputText"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText",
+                                "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                            }
+                        },
                         "description": "The completed content part"
                     },
                     "sequence_number": {
@@ -11705,7 +11687,37 @@
                         "description": "Unique identifier of the response containing this output"
                     },
                     "item": {
-                        "$ref": "#/components/schemas/OpenAIResponseOutput",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseMessage"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "message": "#/components/schemas/OpenAIResponseMessage",
+                                "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
+                                "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
+                                "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
+                                "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
+                                "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                            }
+                        },
                         "description": "The output item that was added (message, tool call, etc.)"
                     },
                     "output_index": {
@@ -11742,7 +11754,37 @@
                         "description": "Unique identifier of the response containing this output"
                     },
                     "item": {
-                        "$ref": "#/components/schemas/OpenAIResponseOutput",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseMessage"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "message": "#/components/schemas/OpenAIResponseMessage",
+                                "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
+                                "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
+                                "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
+                                "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
+                                "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                            }
+                        },
                         "description": "The completed output item (message, tool call, etc.)"
                     },
                     "output_index": {
@@ -12095,7 +12137,21 @@
                 "type": "object",
                 "properties": {
                     "eval_candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/ModelCandidate"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentCandidate"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "model": "#/components/schemas/ModelCandidate",
+                                "agent": "#/components/schemas/AgentCandidate"
+                            }
+                        },
                         "description": "The candidate to evaluate."
                     },
                     "scoring_params": {
@@ -12118,23 +12174,6 @@
                 "title": "BenchmarkConfig",
                 "description": "A benchmark configuration for evaluation."
             },
-            "EvalCandidate": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/ModelCandidate"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentCandidate"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "model": "#/components/schemas/ModelCandidate",
-                        "agent": "#/components/schemas/AgentCandidate"
-                    }
-                }
-            },
             "LLMAsJudgeScoringFnParams": {
                 "type": "object",
                 "properties": {
@@ -12770,7 +12809,33 @@
                 "type": "object",
                 "properties": {
                     "message": {
-                        "$ref": "#/components/schemas/OpenAIMessageParam",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIUserMessageParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAISystemMessageParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIAssistantMessageParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIToolMessageParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIDeveloperMessageParam"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "role",
+                            "mapping": {
+                                "user": "#/components/schemas/OpenAIUserMessageParam",
+                                "system": "#/components/schemas/OpenAISystemMessageParam",
+                                "assistant": "#/components/schemas/OpenAIAssistantMessageParam",
+                                "tool": "#/components/schemas/OpenAIToolMessageParam",
+                                "developer": "#/components/schemas/OpenAIDeveloperMessageParam"
+                            }
+                        },
                         "description": "The message from the model"
                     },
                     "finish_reason": {
@@ -13146,23 +13211,6 @@
                 ],
                 "title": "OpenAICompletionWithInputMessages"
             },
-            "DataSource": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/URIDataSource"
-                    },
-                    {
-                        "$ref": "#/components/schemas/RowsDataSource"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "uri": "#/components/schemas/URIDataSource",
-                        "rows": "#/components/schemas/RowsDataSource"
-                    }
-                }
-            },
             "Dataset": {
                 "type": "object",
                 "properties": {
@@ -13202,7 +13250,21 @@
                         "description": "Purpose of the dataset indicating its intended use"
                     },
                     "source": {
-                        "$ref": "#/components/schemas/DataSource",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/URIDataSource"
+                            },
+                            {
+                                "$ref": "#/components/schemas/RowsDataSource"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "uri": "#/components/schemas/URIDataSource",
+                                "rows": "#/components/schemas/RowsDataSource"
+                            }
+                        },
                         "description": "Data source configuration for the dataset"
                     },
                     "metadata": {
@@ -13531,55 +13593,6 @@
                 "title": "ObjectType",
                 "description": "Parameter type for object values."
             },
-            "ParamType": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/StringType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/NumberType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/BooleanType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ArrayType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ObjectType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/JsonType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/UnionType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ChatCompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/CompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnInputType"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "string": "#/components/schemas/StringType",
-                        "number": "#/components/schemas/NumberType",
-                        "boolean": "#/components/schemas/BooleanType",
-                        "array": "#/components/schemas/ArrayType",
-                        "object": "#/components/schemas/ObjectType",
-                        "json": "#/components/schemas/JsonType",
-                        "union": "#/components/schemas/UnionType",
-                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
-                        "completion_input": "#/components/schemas/CompletionInputType",
-                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
-                    }
-                }
-            },
             "ScoringFn": {
                 "type": "object",
                 "properties": {
@@ -13638,7 +13651,53 @@
                         }
                     },
                     "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/StringType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/NumberType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/BooleanType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ArrayType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ObjectType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/JsonType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/UnionType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ChatCompletionInputType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/CompletionInputType"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentTurnInputType"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "string": "#/components/schemas/StringType",
+                                "number": "#/components/schemas/NumberType",
+                                "boolean": "#/components/schemas/BooleanType",
+                                "array": "#/components/schemas/ArrayType",
+                                "object": "#/components/schemas/ObjectType",
+                                "json": "#/components/schemas/JsonType",
+                                "union": "#/components/schemas/UnionType",
+                                "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
+                                "completion_input": "#/components/schemas/CompletionInputType",
+                                "agent_turn_input": "#/components/schemas/AgentTurnInputType"
+                            }
+                        }
                     },
                     "params": {
                         "$ref": "#/components/schemas/ScoringFnParams"
@@ -15548,7 +15607,21 @@
                         "description": "Event type identifier set to STRUCTURED_LOG"
                     },
                     "payload": {
-                        "$ref": "#/components/schemas/StructuredLogPayload",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/SpanStartPayload"
+                            },
+                            {
+                                "$ref": "#/components/schemas/SpanEndPayload"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "span_start": "#/components/schemas/SpanStartPayload",
+                                "span_end": "#/components/schemas/SpanEndPayload"
+                            }
+                        },
                         "description": "The structured payload data for the log event"
                     }
                 },
@@ -15563,23 +15636,6 @@
                 "title": "StructuredLogEvent",
                 "description": "A structured log event containing typed payload data."
             },
-            "StructuredLogPayload": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/SpanStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/SpanEndPayload"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "span_start": "#/components/schemas/SpanStartPayload",
-                        "span_end": "#/components/schemas/SpanEndPayload"
-                    }
-                }
-            },
             "StructuredLogType": {
                 "type": "string",
                 "enum": [
@@ -15864,7 +15920,21 @@
                         "description": "Key-value attributes associated with the file"
                     },
                     "chunking_strategy": {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
+                            },
+                            {
+                                "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
+                                "static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                            }
+                        },
                         "description": "Strategy used for splitting the file into chunks"
                     },
                     "created_at": {
@@ -17677,6 +17747,25 @@
                 ],
                 "title": "OpenaiUpdateVectorStoreFileRequest"
             },
+            "ExpiresAfter": {
+                "type": "object",
+                "properties": {
+                    "anchor": {
+                        "type": "string",
+                        "const": "created_at"
+                    },
+                    "seconds": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "anchor",
+                    "seconds"
+                ],
+                "title": "ExpiresAfter",
+                "description": "Control expiration of uploaded files.\nParams:\n - anchor, must be \"created_at\"\n - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)"
+            },
             "DPOAlignmentConfig": {
                 "type": "object",
                 "properties": {
@@ -18028,7 +18117,21 @@
                 "type": "object",
                 "properties": {
                     "query_generator_config": {
-                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig",
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
+                                "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        },
                         "description": "Configuration for the query generator."
                     },
                     "max_tokens_in_context": {
@@ -18066,23 +18169,6 @@
                 "title": "RAGQueryConfig",
                 "description": "Configuration for the RAG query generation."
             },
-            "RAGQueryGeneratorConfig": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
-                    },
-                    {
-                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
-                        "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                    }
-                }
-            },
             "RAGSearchMode": {
                 "type": "string",
                 "enum": [
@@ -18664,6 +18750,23 @@
                 ],
                 "title": "RegisterBenchmarkRequest"
             },
+            "DataSource": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/URIDataSource"
+                    },
+                    {
+                        "$ref": "#/components/schemas/RowsDataSource"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "uri": "#/components/schemas/URIDataSource",
+                        "rows": "#/components/schemas/RowsDataSource"
+                    }
+                }
+            },
             "RegisterDatasetRequest": {
                 "type": "object",
                 "properties": {
@@ -18770,6 +18873,55 @@
                 ],
                 "title": "RegisterModelRequest"
             },
+            "ParamType": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/StringType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/NumberType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/BooleanType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ArrayType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ObjectType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/JsonType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/UnionType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ChatCompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/CompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnInputType"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "string": "#/components/schemas/StringType",
+                        "number": "#/components/schemas/NumberType",
+                        "boolean": "#/components/schemas/BooleanType",
+                        "array": "#/components/schemas/ArrayType",
+                        "object": "#/components/schemas/ObjectType",
+                        "json": "#/components/schemas/JsonType",
+                        "union": "#/components/schemas/UnionType",
+                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
+                        "completion_input": "#/components/schemas/CompletionInputType",
+                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
+                    }
+                }
+            },
             "RegisterScoringFunctionRequest": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 7b51116ba..fe86b0ff0 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -4383,8 +4383,6 @@ paths:
         - purpose: The intended purpose of the uploaded file.
 
         - expires_after: Optional form values describing expiration for the file.
-        Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
-        Seconds must be between 3600 and 2592000 (1 hour to 30 days).
       parameters: []
       requestBody:
         content:
@@ -4397,19 +4395,11 @@ paths:
                   format: binary
                 purpose:
                   $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after_anchor:
-                  oneOf:
-                    - type: string
-                    - type: 'null'
-                expires_after_seconds:
-                  oneOf:
-                    - type: integer
-                    - type: 'null'
+                expires_after:
+                  $ref: '#/components/schemas/ExpiresAfter'
               required:
                 - file
                 - purpose
-                - expires_after_anchor
-                - expires_after_seconds
         required: true
   /v1/openai/v1/files:
     get:
@@ -4504,8 +4494,6 @@ paths:
         - purpose: The intended purpose of the uploaded file.
 
         - expires_after: Optional form values describing expiration for the file.
-        Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
-        Seconds must be between 3600 and 2592000 (1 hour to 30 days).
       parameters: []
       requestBody:
         content:
@@ -4518,19 +4506,11 @@ paths:
                   format: binary
                 purpose:
                   $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after_anchor:
-                  oneOf:
-                    - type: string
-                    - type: 'null'
-                expires_after_seconds:
-                  oneOf:
-                    - type: integer
-                    - type: 'null'
+                expires_after:
+                  $ref: '#/components/schemas/ExpiresAfter'
               required:
                 - file
                 - purpose
-                - expires_after_anchor
-                - expires_after_seconds
         required: true
   /v1/openai/v1/models:
     get:
@@ -5763,7 +5743,16 @@ components:
       type: object
       properties:
         strategy:
-          $ref: '#/components/schemas/SamplingStrategy'
+          oneOf:
+            - $ref: '#/components/schemas/GreedySamplingStrategy'
+            - $ref: '#/components/schemas/TopPSamplingStrategy'
+            - $ref: '#/components/schemas/TopKSamplingStrategy'
+          discriminator:
+            propertyName: type
+            mapping:
+              greedy: '#/components/schemas/GreedySamplingStrategy'
+              top_p: '#/components/schemas/TopPSamplingStrategy'
+              top_k: '#/components/schemas/TopKSamplingStrategy'
           description: The sampling strategy.
         max_tokens:
           type: integer
@@ -5791,17 +5780,6 @@ components:
         - strategy
       title: SamplingParams
       description: Sampling parameters.
-    SamplingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/GreedySamplingStrategy'
-        - $ref: '#/components/schemas/TopPSamplingStrategy'
-        - $ref: '#/components/schemas/TopKSamplingStrategy'
-      discriminator:
-        propertyName: type
-        mapping:
-          greedy: '#/components/schemas/GreedySamplingStrategy'
-          top_p: '#/components/schemas/TopPSamplingStrategy'
-          top_k: '#/components/schemas/TopKSamplingStrategy'
     SystemMessage:
       type: object
       properties:
@@ -6248,7 +6226,16 @@ components:
             - progress
           description: Type of the event
         delta:
-          $ref: '#/components/schemas/ContentDelta'
+          oneOf:
+            - $ref: '#/components/schemas/TextDelta'
+            - $ref: '#/components/schemas/ImageDelta'
+            - $ref: '#/components/schemas/ToolCallDelta'
+          discriminator:
+            propertyName: type
+            mapping:
+              text: '#/components/schemas/TextDelta'
+              image: '#/components/schemas/ImageDelta'
+              tool_call: '#/components/schemas/ToolCallDelta'
           description: >-
             Content generated since last event. This can be one or more tokens, or
             a tool call.
@@ -6291,17 +6278,6 @@ components:
       title: ChatCompletionResponseStreamChunk
       description: >-
         A chunk of a streamed chat completion response.
-    ContentDelta:
-      oneOf:
-        - $ref: '#/components/schemas/TextDelta'
-        - $ref: '#/components/schemas/ImageDelta'
-        - $ref: '#/components/schemas/ToolCallDelta'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/TextDelta'
-          image: '#/components/schemas/ImageDelta'
-          tool_call: '#/components/schemas/ToolCallDelta'
     ImageDelta:
       type: object
       properties:
@@ -6983,7 +6959,22 @@ components:
       type: object
       properties:
         payload:
-          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
+          oneOf:
+            - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
+          discriminator:
+            propertyName: event_type
+            mapping:
+              step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
+              step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+              step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+              turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+              turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+              turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
           description: >-
             Event-specific payload containing event data
       additionalProperties: false
@@ -6992,23 +6983,6 @@ components:
       title: AgentTurnResponseEvent
       description: >-
         An event in an agent turn response stream.
-    AgentTurnResponseEventPayload:
-      oneOf:
-        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
-      discriminator:
-        propertyName: event_type
-        mapping:
-          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
-          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-          turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
     AgentTurnResponseStepCompletePayload:
       type: object
       properties:
@@ -7087,7 +7061,16 @@ components:
           description: >-
             Unique identifier for the step within a turn
         delta:
-          $ref: '#/components/schemas/ContentDelta'
+          oneOf:
+            - $ref: '#/components/schemas/TextDelta'
+            - $ref: '#/components/schemas/ImageDelta'
+            - $ref: '#/components/schemas/ToolCallDelta'
+          discriminator:
+            propertyName: type
+            mapping:
+              text: '#/components/schemas/TextDelta'
+              image: '#/components/schemas/ImageDelta'
+              tool_call: '#/components/schemas/ToolCallDelta'
           description: >-
             Incremental content changes during step execution
       additionalProperties: false
@@ -8156,15 +8139,6 @@ components:
       title: OpenAIResponseOutputMessageMCPListTools
       description: >-
         MCP list tools output message containing available tools from an MCP server.
-    OpenAIResponseContentPart:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-        - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-      discriminator:
-        propertyName: type
-        mapping:
-          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
     OpenAIResponseContentPartOutputText:
       type: object
       properties:
@@ -8272,7 +8246,14 @@ components:
           description: >-
             Unique identifier of the output item containing this content part
         part:
-          $ref: '#/components/schemas/OpenAIResponseContentPart'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+          discriminator:
+            propertyName: type
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
           description: The content part that was added
         sequence_number:
           type: integer
@@ -8307,7 +8288,14 @@ components:
           description: >-
             Unique identifier of the output item containing this content part
         part:
-          $ref: '#/components/schemas/OpenAIResponseContentPart'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+          discriminator:
+            propertyName: type
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
           description: The completed content part
         sequence_number:
           type: integer
@@ -8593,7 +8581,22 @@ components:
           description: >-
             Unique identifier of the response containing this output
         item:
-          $ref: '#/components/schemas/OpenAIResponseOutput'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          discriminator:
+            propertyName: type
+            mapping:
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
           description: >-
             The output item that was added (message, tool call, etc.)
         output_index:
@@ -8629,7 +8632,22 @@ components:
           description: >-
             Unique identifier of the response containing this output
         item:
-          $ref: '#/components/schemas/OpenAIResponseOutput'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          discriminator:
+            propertyName: type
+            mapping:
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
           description: >-
             The completed output item (message, tool call, etc.)
         output_index:
@@ -8952,7 +8970,14 @@ components:
       type: object
       properties:
         eval_candidate:
-          $ref: '#/components/schemas/EvalCandidate'
+          oneOf:
+            - $ref: '#/components/schemas/ModelCandidate'
+            - $ref: '#/components/schemas/AgentCandidate'
+          discriminator:
+            propertyName: type
+            mapping:
+              model: '#/components/schemas/ModelCandidate'
+              agent: '#/components/schemas/AgentCandidate'
           description: The candidate to evaluate.
         scoring_params:
           type: object
@@ -8973,15 +8998,6 @@ components:
       title: BenchmarkConfig
       description: >-
         A benchmark configuration for evaluation.
-    EvalCandidate:
-      oneOf:
-        - $ref: '#/components/schemas/ModelCandidate'
-        - $ref: '#/components/schemas/AgentCandidate'
-      discriminator:
-        propertyName: type
-        mapping:
-          model: '#/components/schemas/ModelCandidate'
-          agent: '#/components/schemas/AgentCandidate'
     LLMAsJudgeScoringFnParams:
       type: object
       properties:
@@ -9445,7 +9461,20 @@ components:
       type: object
       properties:
         message:
-          $ref: '#/components/schemas/OpenAIMessageParam'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam'
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+          discriminator:
+            propertyName: role
+            mapping:
+              user: '#/components/schemas/OpenAIUserMessageParam'
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              developer: '#/components/schemas/OpenAIDeveloperMessageParam'
           description: The message from the model
         finish_reason:
           type: string
@@ -9738,15 +9767,6 @@ components:
         - model
         - input_messages
       title: OpenAICompletionWithInputMessages
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
     Dataset:
       type: object
       properties:
@@ -9781,7 +9801,14 @@ components:
           description: >-
             Purpose of the dataset indicating its intended use
         source:
-          $ref: '#/components/schemas/DataSource'
+          oneOf:
+            - $ref: '#/components/schemas/URIDataSource'
+            - $ref: '#/components/schemas/RowsDataSource'
+          discriminator:
+            propertyName: type
+            mapping:
+              uri: '#/components/schemas/URIDataSource'
+              rows: '#/components/schemas/RowsDataSource'
           description: >-
             Data source configuration for the dataset
         metadata:
@@ -10027,31 +10054,6 @@ components:
         - type
       title: ObjectType
       description: Parameter type for object values.
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-        - $ref: '#/components/schemas/AgentTurnInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-          agent_turn_input: '#/components/schemas/AgentTurnInputType'
     ScoringFn:
       type: object
       properties:
@@ -10090,7 +10092,30 @@ components:
               - type: array
               - type: object
         return_type:
-          $ref: '#/components/schemas/ParamType'
+          oneOf:
+            - $ref: '#/components/schemas/StringType'
+            - $ref: '#/components/schemas/NumberType'
+            - $ref: '#/components/schemas/BooleanType'
+            - $ref: '#/components/schemas/ArrayType'
+            - $ref: '#/components/schemas/ObjectType'
+            - $ref: '#/components/schemas/JsonType'
+            - $ref: '#/components/schemas/UnionType'
+            - $ref: '#/components/schemas/ChatCompletionInputType'
+            - $ref: '#/components/schemas/CompletionInputType'
+            - $ref: '#/components/schemas/AgentTurnInputType'
+          discriminator:
+            propertyName: type
+            mapping:
+              string: '#/components/schemas/StringType'
+              number: '#/components/schemas/NumberType'
+              boolean: '#/components/schemas/BooleanType'
+              array: '#/components/schemas/ArrayType'
+              object: '#/components/schemas/ObjectType'
+              json: '#/components/schemas/JsonType'
+              union: '#/components/schemas/UnionType'
+              chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+              completion_input: '#/components/schemas/CompletionInputType'
+              agent_turn_input: '#/components/schemas/AgentTurnInputType'
         params:
           $ref: '#/components/schemas/ScoringFnParams'
       additionalProperties: false
@@ -11542,7 +11567,14 @@ components:
           description: >-
             Event type identifier set to STRUCTURED_LOG
         payload:
-          $ref: '#/components/schemas/StructuredLogPayload'
+          oneOf:
+            - $ref: '#/components/schemas/SpanStartPayload'
+            - $ref: '#/components/schemas/SpanEndPayload'
+          discriminator:
+            propertyName: type
+            mapping:
+              span_start: '#/components/schemas/SpanStartPayload'
+              span_end: '#/components/schemas/SpanEndPayload'
           description: >-
             The structured payload data for the log event
       additionalProperties: false
@@ -11555,15 +11587,6 @@ components:
       title: StructuredLogEvent
       description: >-
         A structured log event containing typed payload data.
-    StructuredLogPayload:
-      oneOf:
-        - $ref: '#/components/schemas/SpanStartPayload'
-        - $ref: '#/components/schemas/SpanEndPayload'
-      discriminator:
-        propertyName: type
-        mapping:
-          span_start: '#/components/schemas/SpanStartPayload'
-          span_end: '#/components/schemas/SpanEndPayload'
     StructuredLogType:
       type: string
       enum:
@@ -11772,7 +11795,14 @@ components:
           description: >-
             Key-value attributes associated with the file
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+          discriminator:
+            propertyName: type
+            mapping:
+              auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
           description: >-
             Strategy used for splitting the file into chunks
         created_at:
@@ -13084,6 +13114,25 @@ components:
       required:
         - attributes
       title: OpenaiUpdateVectorStoreFileRequest
+    ExpiresAfter:
+      type: object
+      properties:
+        anchor:
+          type: string
+          const: created_at
+        seconds:
+          type: integer
+      additionalProperties: false
+      required:
+        - anchor
+        - seconds
+      title: ExpiresAfter
+      description: >-
+        Control expiration of uploaded files.
+
+        Params:
+         - anchor, must be "created_at"
+         - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
     DPOAlignmentConfig:
       type: object
       properties:
@@ -13369,7 +13418,14 @@ components:
       type: object
       properties:
         query_generator_config:
-          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+          oneOf:
+            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          discriminator:
+            propertyName: type
+            mapping:
+              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
           description: Configuration for the query generator.
         max_tokens_in_context:
           type: integer
@@ -13412,15 +13468,6 @@ components:
       title: RAGQueryConfig
       description: >-
         Configuration for the RAG query generation.
-    RAGQueryGeneratorConfig:
-      oneOf:
-        - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-        - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-      discriminator:
-        propertyName: type
-        mapping:
-          default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-          llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
     RAGSearchMode:
       type: string
       enum:
@@ -13856,6 +13903,15 @@ components:
         - dataset_id
         - scoring_functions
       title: RegisterBenchmarkRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
     RegisterDatasetRequest:
       type: object
       properties:
@@ -13940,6 +13996,31 @@ components:
       required:
         - model_id
       title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+        - $ref: '#/components/schemas/AgentTurnInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          agent_turn_input: '#/components/schemas/AgentTurnInputType'
     RegisterScoringFunctionRequest:
       type: object
       properties:
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index d5abb6286..e4cf6283a 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -111,9 +111,7 @@ class Files(Protocol):
         self,
         file: Annotated[UploadFile, File()],
         purpose: Annotated[OpenAIFilePurpose, Form()],
-        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
-        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
-        # TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
+        expires_after: Annotated[ExpiresAfter | None, Form()] = None,
     ) -> OpenAIFileObject:
         """
         Upload a file that can be used across various endpoints.
@@ -121,10 +119,11 @@ class Files(Protocol):
         The file upload should be a multipart form request with:
         - file: The File object (not file name) to be uploaded.
         - purpose: The intended purpose of the uploaded file.
-        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
+        - expires_after: Optional form values describing expiration for the file.
 
         :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
         :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
+        :param expires_after: Optional form values describing expiration for the file.
         :returns: An OpenAIFileObject representing the uploaded file.
         """
         ...
diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py
index 65cf8d815..6e0c72de3 100644
--- a/llama_stack/providers/inline/files/localfs/files.py
+++ b/llama_stack/providers/inline/files/localfs/files.py
@@ -14,6 +14,7 @@ from fastapi import File, Form, Response, UploadFile
 from llama_stack.apis.common.errors import ResourceNotFoundError
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.files import (
+    ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
@@ -86,14 +87,13 @@ class LocalfsFilesImpl(Files):
         self,
         file: Annotated[UploadFile, File()],
         purpose: Annotated[OpenAIFilePurpose, Form()],
-        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
-        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
+        expires_after: Annotated[ExpiresAfter | None, Form()] = None,
     ) -> OpenAIFileObject:
         """Upload a file that can be used across various endpoints."""
         if not self.sql_store:
             raise RuntimeError("Files provider not initialized")
 
-        if expires_after_anchor is not None or expires_after_seconds is not None:
+        if expires_after is not None:
             raise NotImplementedError("File expiration is not supported by this provider")
 
         file_id = self._generate_file_id()
diff --git a/llama_stack/providers/remote/files/s3/files.py b/llama_stack/providers/remote/files/s3/files.py
index 8ea96af9e..8520f70b6 100644
--- a/llama_stack/providers/remote/files/s3/files.py
+++ b/llama_stack/providers/remote/files/s3/files.py
@@ -195,8 +195,7 @@ class S3FilesImpl(Files):
         self,
         file: Annotated[UploadFile, File()],
         purpose: Annotated[OpenAIFilePurpose, Form()],
-        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
-        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
+        expires_after: Annotated[ExpiresAfter | None, Form()] = None,
     ) -> OpenAIFileObject:
         file_id = f"file-{uuid.uuid4().hex}"
 
@@ -204,14 +203,6 @@ class S3FilesImpl(Files):
 
         created_at = self._now()
 
-        expires_after = None
-        if expires_after_anchor is not None or expires_after_seconds is not None:
-            # we use ExpiresAfter to validate input
-            expires_after = ExpiresAfter(
-                anchor=expires_after_anchor,  # type: ignore[arg-type]
-                seconds=expires_after_seconds,  # type: ignore[arg-type]
-            )
-
         # the default is no expiration.
         # to implement no expiration we set an expiration beyond the max.
         # we'll hide this fact from users when returning the file object.
diff --git a/llama_stack/strong_typing/inspection.py b/llama_stack/strong_typing/inspection.py
index a75a170cf..42713e371 100644
--- a/llama_stack/strong_typing/inspection.py
+++ b/llama_stack/strong_typing/inspection.py
@@ -567,6 +567,22 @@ def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
 
     if is_dataclass_type(typ):
         return ((field.name, field.type) for field in dataclasses.fields(typ))
+    elif hasattr(typ, "model_fields"):
+        # Pydantic BaseModel - use model_fields to exclude ClassVar and other non-field attributes
+        # Reconstruct Annotated type if discriminator exists to preserve metadata
+        from typing import Annotated, Any, cast
+        from pydantic.fields import FieldInfo
+
+        def get_field_type(name: str, field: Any) -> type | str:
+            # If field has discriminator, wrap in Annotated to preserve it for schema generation
+            if field.discriminator:
+                field_info = FieldInfo(annotation=None, discriminator=field.discriminator)
+                # Annotated returns _AnnotatedAlias which isn't a type but is valid here
+                return Annotated[field.annotation, field_info]  # type: ignore[return-value]
+            # field.annotation can be Union types, Annotated, etc. which aren't type but are valid
+            return field.annotation  # type: ignore[return-value,no-any-return]
+
+        return ((name, get_field_type(name, field)) for name, field in typ.model_fields.items())
     else:
         resolved_hints = get_resolved_hints(typ)
         return resolved_hints.items()
diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py
index 82baddc86..2bfb7033e 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/llama_stack/strong_typing/schema.py
@@ -92,7 +92,12 @@ def get_class_property_docstrings(
     :returns: A dictionary mapping property names to descriptions.
     """
 
-    result = {}
+    result: Dict[str, str] = {}
+    # Only try to get MRO if data_type is actually a class
+    # Special types like Literal, Union, etc. don't have MRO
+    if not inspect.isclass(data_type):
+        return result
+
     for base in inspect.getmro(data_type):
         docstr = docstring.parse_type(base)
         for param in docstr.params.values():
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index c665bf124..92a45a9f2 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -228,12 +228,13 @@ class TestS3FilesImpl:
 
             mock_now.return_value = 0
 
+            from llama_stack.apis.files import ExpiresAfter
+
             sample_text_file.filename = "test_expired_file"
             uploaded = await s3_provider.openai_upload_file(
                 file=sample_text_file,
                 purpose=OpenAIFilePurpose.ASSISTANTS,
-                expires_after_anchor="created_at",
-                expires_after_seconds=two_hours,
+                expires_after=ExpiresAfter(anchor="created_at", seconds=two_hours),
             )
 
             mock_now.return_value = two_hours * 2  # fast forward 4 hours
@@ -259,42 +260,44 @@ class TestS3FilesImpl:
 
     async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
         """Unsupported anchor value should raise ValueError."""
+        from llama_stack.apis.files import ExpiresAfter
+
         sample_text_file.filename = "test_unsupported_expires_after_anchor"
 
         with pytest.raises(ValueError, match="Input should be 'created_at'"):
             await s3_provider.openai_upload_file(
                 file=sample_text_file,
                 purpose=OpenAIFilePurpose.ASSISTANTS,
-                expires_after_anchor="now",
-                expires_after_seconds=3600,
+                expires_after=ExpiresAfter(anchor="now", seconds=3600),  # type: ignore
             )
 
     async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
         """Non-integer seconds in expires_after should raise ValueError."""
+        from llama_stack.apis.files import ExpiresAfter
+
         sample_text_file.filename = "test_nonint_expires_after_seconds"
 
         with pytest.raises(ValueError, match="should be a valid integer"):
             await s3_provider.openai_upload_file(
                 file=sample_text_file,
                 purpose=OpenAIFilePurpose.ASSISTANTS,
-                expires_after_anchor="created_at",
-                expires_after_seconds="many",
+                expires_after=ExpiresAfter(anchor="created_at", seconds="many"),  # type: ignore
             )
 
     async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
         """Seconds outside allowed range should raise ValueError."""
+        from llama_stack.apis.files import ExpiresAfter
+
         with pytest.raises(ValueError, match="greater than or equal to 3600"):
             await s3_provider.openai_upload_file(
                 file=sample_text_file,
                 purpose=OpenAIFilePurpose.ASSISTANTS,
-                expires_after_anchor="created_at",
-                expires_after_seconds=3599,
+                expires_after=ExpiresAfter(anchor="created_at", seconds=3599),
             )
 
         with pytest.raises(ValueError, match="less than or equal to 2592000"):
             await s3_provider.openai_upload_file(
                 file=sample_text_file,
                 purpose=OpenAIFilePurpose.ASSISTANTS,
-                expires_after_anchor="created_at",
-                expires_after_seconds=2592001,
+                expires_after=ExpiresAfter(anchor="created_at", seconds=2592001),
             )

From 56b625d18af5c53446facd4e4020b3195df7b081 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 29 Sep 2025 22:57:37 -0700
Subject: [PATCH 11/13] feat(openai_movement)!: Change URL structures to kill
 /openai/v1  (part 2) (#3605)

---
 docs/docs/providers/openai.mdx          |    6 +-
 docs/static/llama-stack-spec.html       | 1593 -----------------------
 docs/static/llama-stack-spec.yaml       | 1188 -----------------
 llama_stack/apis/agents/agents.py       |    5 -
 llama_stack/apis/batches/batches.py     |    4 -
 llama_stack/apis/files/files.py         |    5 -
 llama_stack/apis/inference/inference.py |    5 -
 llama_stack/apis/models/models.py       |    8 -
 llama_stack/apis/safety/safety.py       |    1 -
 llama_stack/apis/vector_io/vector_io.py |   22 -
 10 files changed, 3 insertions(+), 2834 deletions(-)

diff --git a/docs/docs/providers/openai.mdx b/docs/docs/providers/openai.mdx
index bcff5873c..3ae8004e5 100644
--- a/docs/docs/providers/openai.mdx
+++ b/docs/docs/providers/openai.mdx
@@ -7,7 +7,7 @@ sidebar_position: 1
 
 ### Server path
 
-Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
+Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.
 
 ### Clients
 
@@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")
 
 #### OpenAI Client
 
-When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
+When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.
 
 ```python
 from openai import OpenAI
 
-client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
+client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
 ```
 
 Regardless of the client you choose, the following code examples should all work the same.
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 616ebb4fc..01b316069 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -545,124 +545,6 @@
                 }
             }
         },
-        "/v1/openai/v1/responses": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A ListOpenAIResponseObject.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Agents"
-                ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
-                "parameters": [
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "The ID of the last response to return.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "The number of responses to return.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "model",
-                        "in": "query",
-                        "description": "The model to filter responses by.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/Order"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIResponseObject.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIResponseObject"
-                                }
-                            },
-                            "text/event-stream": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Agents"
-                ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/prompts": {
             "get": {
                 "responses": {
@@ -1013,92 +895,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/responses/{response_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIResponseObject.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIResponseObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Agents"
-                ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
-                "parameters": [
-                    {
-                        "name": "response_id",
-                        "in": "path",
-                        "description": "The ID of the OpenAI response to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "delete": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIDeleteResponseObject",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIDeleteResponseObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Agents"
-                ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
-                "parameters": [
-                    {
-                        "name": "response_id",
-                        "in": "path",
-                        "description": "The ID of the OpenAI response to delete.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/prompts/{prompt_id}": {
             "get": {
                 "responses": {
@@ -1682,50 +1478,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/chat/completions/{completion_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A OpenAICompletionWithInputMessages.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAICompletionWithInputMessages"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
-                "parameters": [
-                    {
-                        "name": "completion_id",
-                        "in": "path",
-                        "description": "ID of the chat completion.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/datasets/{dataset_id}": {
             "get": {
                 "responses": {
@@ -3517,126 +3269,6 @@
                 }
             }
         },
-        "/v1/openai/v1/chat/completions": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A ListOpenAIChatCompletionResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ListOpenAIChatCompletionResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
-                "parameters": [
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "The ID of the last chat completion to return.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "The maximum number of chat completions to return.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "model",
-                        "in": "query",
-                        "description": "The model to filter by.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "The order to sort the chat completions by: \"asc\" or \"desc\". Defaults to \"desc\".",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/Order"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIChatCompletion.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "oneOf": [
-                                        {
-                                            "$ref": "#/components/schemas/OpenAIChatCompletion"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/OpenAIChatCompletionChunk"
-                                        }
-                                    ]
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiChatCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/datasets": {
             "get": {
                 "responses": {
@@ -3881,98 +3513,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/responses/{response_id}/input_items": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "An ListOpenAIResponseInputItem.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ListOpenAIResponseInputItem"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Agents"
-                ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
-                "parameters": [
-                    {
-                        "name": "response_id",
-                        "in": "path",
-                        "description": "The ID of the response to retrieve input items for.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "An item ID to list items after, used for pagination.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "before",
-                        "in": "query",
-                        "description": "An item ID to list items before, used for pagination.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "include",
-                        "in": "query",
-                        "description": "Additional fields to include in the response.",
-                        "required": false,
-                        "schema": {
-                            "type": "array",
-                            "items": {
-                                "type": "string"
-                            }
-                        }
-                    },
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "The order to return the input items in. Default is desc.",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/Order"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/prompts/{prompt_id}/versions": {
             "get": {
                 "responses": {
@@ -4650,147 +4190,6 @@
                 }
             }
         },
-        "/v1/openai/v1/vector_stores/{vector_store_id}/files": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreListFilesResponse containing the list of files.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreListFilesResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "List files in a vector store.",
-                "description": "List files in a vector store.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to list files from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "(Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "(Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "(Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "before",
-                        "in": "query",
-                        "description": "(Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "filter",
-                        "in": "query",
-                        "description": "(Optional) Filter by file status to only return files with the specified status.",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/VectorStoreFileStatus"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreFileObject representing the attached file.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreFileObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Attach a file to a vector store.",
-                "description": "Attach a file to a vector store.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to attach the file to.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiAttachFileToVectorStoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/completions": {
             "post": {
                 "responses": {
@@ -4835,50 +4234,6 @@
                 }
             }
         },
-        "/v1/openai/v1/completions": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAICompletion.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAICompletion"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/vector_stores": {
             "get": {
                 "responses": {
@@ -4992,119 +4347,6 @@
                 }
             }
         },
-        "/v1/openai/v1/vector_stores": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreListResponse containing the list of vector stores.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreListResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Returns a list of vector stores.",
-                "description": "Returns a list of vector stores.",
-                "parameters": [
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "A cursor for use in pagination. `after` is an object ID that defines your place in the list.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "before",
-                        "in": "query",
-                        "description": "A cursor for use in pagination. `before` is an object ID that defines your place in the list.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreObject representing the created vector store.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Creates a vector store.",
-                "description": "Creates a vector store.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/files/{file_id}": {
             "get": {
                 "responses": {
@@ -5191,92 +4433,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/files/{file_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIFileObject containing file information.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIFileObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Files"
-                ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
-                "parameters": [
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to use for this request.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "delete": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIFileDeleteResponse indicating successful deletion.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIFileDeleteResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Files"
-                ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
-                "parameters": [
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to use for this request.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/vector_stores/{vector_store_id}": {
             "get": {
                 "responses": {
@@ -5415,144 +4571,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/vector_stores/{vector_store_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreObject representing the vector store.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Retrieves a vector store.",
-                "description": "Retrieves a vector store.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreObject representing the updated vector store.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Updates a vector store.",
-                "description": "Updates a vector store.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to update.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiUpdateVectorStoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            },
-            "delete": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreDeleteResponse indicating the deletion status.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreDeleteResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Delete a vector store.",
-                "description": "Delete a vector store.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to delete.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/vector_stores/{vector_store_id}/files/{file_id}": {
             "get": {
                 "responses": {
@@ -5718,171 +4736,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreFileObject representing the file.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreFileObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Retrieves a vector store file.",
-                "description": "Retrieves a vector store file.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store containing the file to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreFileObject representing the updated file.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreFileObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Updates a vector store file.",
-                "description": "Updates a vector store file.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store containing the file to update.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to update.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiUpdateVectorStoreFileRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            },
-            "delete": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreFileDeleteResponse indicating the deletion status.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreFileDeleteResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Delete a vector store file.",
-                "description": "Delete a vector store file.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store containing the file to delete.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to delete.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/embeddings": {
             "post": {
                 "responses": {
@@ -5927,50 +4780,6 @@
                 }
             }
         },
-        "/v1/openai/v1/embeddings": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIEmbeddingsResponse containing the embeddings.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIEmbeddingsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiEmbeddingsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/files": {
             "get": {
                 "responses": {
@@ -6100,169 +4909,6 @@
                 }
             }
         },
-        "/v1/openai/v1/files": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "An ListOpenAIFileResponse containing the list of files.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ListOpenAIFileResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Files"
-                ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
-                "parameters": [
-                    {
-                        "name": "after",
-                        "in": "query",
-                        "description": "A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "limit",
-                        "in": "query",
-                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.",
-                        "required": false,
-                        "schema": {
-                            "type": "integer"
-                        }
-                    },
-                    {
-                        "name": "order",
-                        "in": "query",
-                        "description": "Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/Order"
-                        }
-                    },
-                    {
-                        "name": "purpose",
-                        "in": "query",
-                        "description": "Only return files with the given purpose.",
-                        "required": false,
-                        "schema": {
-                            "$ref": "#/components/schemas/OpenAIFilePurpose"
-                        }
-                    }
-                ]
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An OpenAIFileObject representing the uploaded file.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIFileObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Files"
-                ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "multipart/form-data": {
-                            "schema": {
-                                "type": "object",
-                                "properties": {
-                                    "file": {
-                                        "type": "string",
-                                        "format": "binary"
-                                    },
-                                    "purpose": {
-                                        "$ref": "#/components/schemas/OpenAIFilePurpose"
-                                    },
-                                    "expires_after": {
-                                        "$ref": "#/components/schemas/ExpiresAfter"
-                                    }
-                                },
-                                "required": [
-                                    "file",
-                                    "purpose"
-                                ]
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/v1/openai/v1/models": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A OpenAIListModelsResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/OpenAIListModelsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Models"
-                ],
-                "summary": "List models using the OpenAI API.",
-                "description": "List models using the OpenAI API.",
-                "parameters": []
-            }
-        },
         "/v1/files/{file_id}/content": {
             "get": {
                 "responses": {
@@ -6307,50 +4953,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/files/{file_id}/content": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "The raw file content as a binary response.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Response"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Files"
-                ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
-                "parameters": [
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to use for this request.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/vector_stores/{vector_store_id}/files/{file_id}/content": {
             "get": {
                 "responses": {
@@ -6404,59 +5006,6 @@
                 ]
             }
         },
-        "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A list of InterleavedContent representing the file contents.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreFileContentsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Retrieves the contents of a vector store file.",
-                "description": "Retrieves the contents of a vector store file.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store containing the file to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "file_id",
-                        "in": "path",
-                        "description": "The ID of the file to retrieve.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/vector_stores/{vector_store_id}/search": {
             "post": {
                 "responses": {
@@ -6511,60 +5060,6 @@
                 }
             }
         },
-        "/v1/openai/v1/vector_stores/{vector_store_id}/search": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A VectorStoreSearchResponse containing the search results.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/VectorStoreSearchResponsePage"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "summary": "Search for chunks in a vector store.",
-                "description": "Search for chunks in a vector store.\nSearches a vector store for relevant chunks based on a query and optional file attribute filters.",
-                "parameters": [
-                    {
-                        "name": "vector_store_id",
-                        "in": "path",
-                        "description": "The ID of the vector store to search.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/OpenaiSearchVectorStoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1alpha/post-training/preference-optimize": {
             "post": {
                 "responses": {
@@ -7156,50 +5651,6 @@
                 }
             }
         },
-        "/v1/openai/v1/moderations": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A moderation object.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ModerationObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Safety"
-                ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/RunModerationRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/safety/run-shield": {
             "post": {
                 "responses": {
@@ -17319,50 +15770,6 @@
                 "title": "VectorStoreListFilesResponse",
                 "description": "Response from listing files in a vector store."
             },
-            "OpenAIModel": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": "string"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "model",
-                        "default": "model"
-                    },
-                    "created": {
-                        "type": "integer"
-                    },
-                    "owned_by": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "id",
-                    "object",
-                    "created",
-                    "owned_by"
-                ],
-                "title": "OpenAIModel",
-                "description": "A model from OpenAI."
-            },
-            "OpenAIListModelsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIModel"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "OpenAIListModelsResponse"
-            },
             "VectorStoreListResponse": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index fe86b0ff0..f2a618b3a 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -367,87 +367,6 @@ paths:
             schema:
               $ref: '#/components/schemas/CreateOpenaiResponseRequest'
         required: true
-  /v1/openai/v1/responses:
-    get:
-      responses:
-        '200':
-          description: A ListOpenAIResponseObject.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListOpenAIResponseObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
-      parameters:
-        - name: after
-          in: query
-          description: The ID of the last response to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: The number of responses to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter responses by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort responses by when sorted by created_at ('asc' or 'desc').
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-    post:
-      responses:
-        '200':
-          description: An OpenAIResponseObject.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIResponseObject'
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/OpenAIResponseObjectStream'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
-        required: true
   /v1/prompts:
     get:
       responses:
@@ -699,66 +618,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/responses/{response_id}:
-    get:
-      responses:
-        '200':
-          description: An OpenAIResponseObject.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIResponseObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
-      parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the OpenAI response to retrieve.
-          required: true
-          schema:
-            type: string
-    delete:
-      responses:
-        '200':
-          description: An OpenAIDeleteResponseObject
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIDeleteResponseObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
-      parameters:
-        - name: response_id
-          in: path
-          description: The ID of the OpenAI response to delete.
-          required: true
-          schema:
-            type: string
   /v1/prompts/{prompt_id}:
     get:
       responses:
@@ -1169,36 +1028,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/chat/completions/{completion_id}:
-    get:
-      responses:
-        '200':
-          description: A OpenAICompletionWithInputMessages.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
-      parameters:
-        - name: completion_id
-          in: path
-          description: ID of the chat completion.
-          required: true
-          schema:
-            type: string
   /v1/datasets/{dataset_id}:
     get:
       responses:
@@ -2482,93 +2311,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiChatCompletionRequest'
         required: true
-  /v1/openai/v1/chat/completions:
-    get:
-      responses:
-        '200':
-          description: A ListOpenAIChatCompletionResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
-      parameters:
-        - name: after
-          in: query
-          description: >-
-            The ID of the last chat completion to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            The maximum number of chat completions to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort the chat completions by: "asc" or "desc". Defaults to
-            "desc".
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-    post:
-      responses:
-        '200':
-          description: An OpenAIChatCompletion.
-          content:
-            application/json:
-              schema:
-                oneOf:
-                  - $ref: '#/components/schemas/OpenAIChatCompletion'
-                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      description: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiChatCompletionRequest'
-        required: true
   /v1/datasets:
     get:
       responses:
@@ -2746,77 +2488,6 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/Order'
-  /v1/openai/v1/responses/{response_id}/input_items:
-    get:
-      responses:
-        '200':
-          description: An ListOpenAIResponseInputItem.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListOpenAIResponseInputItem'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
-      parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the response to retrieve input items for.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            An item ID to list items before, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Additional fields to include in the response.
-          required: false
-          schema:
-            type: array
-            items:
-              type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return the input items in. Default is desc.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
   /v1/prompts/{prompt_id}/versions:
     get:
       responses:
@@ -3309,115 +2980,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
         required: true
-  /v1/openai/v1/vector_stores/{vector_store_id}/files:
-    get:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreListFilesResponse containing the list of files.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreListFilesResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: List files in a vector store.
-      description: List files in a vector store.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to list files from.
-          required: true
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            (Optional) A limit on the number of objects to be returned. Limit can
-            range between 1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            (Optional) Sort order by the `created_at` timestamp of the objects. `asc`
-            for ascending order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `after` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `before` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            (Optional) Filter by file status to only return files with the specified
-            status.
-          required: false
-          schema:
-            $ref: '#/components/schemas/VectorStoreFileStatus'
-    post:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreFileObject representing the attached file.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreFileObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Attach a file to a vector store.
-      description: Attach a file to a vector store.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to attach the file to.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
-        required: true
   /v1/completions:
     post:
       responses:
@@ -3452,40 +3014,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiCompletionRequest'
         required: true
-  /v1/openai/v1/completions:
-    post:
-      responses:
-        '200':
-          description: An OpenAICompletion.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAICompletion'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
-      description: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiCompletionRequest'
-        required: true
   /v1/vector_stores:
     get:
       responses:
@@ -3573,93 +3101,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
         required: true
-  /v1/openai/v1/vector_stores:
-    get:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreListResponse containing the list of vector stores.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreListResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Returns a list of vector stores.
-      description: Returns a list of vector stores.
-      parameters:
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-    post:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreObject representing the created vector store.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Creates a vector store.
-      description: Creates a vector store.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
-        required: true
   /v1/files/{file_id}:
     get:
       responses:
@@ -3725,71 +3166,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/files/{file_id}:
-    get:
-      responses:
-        '200':
-          description: >-
-            An OpenAIFileObject containing file information.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIFileObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Files
-      summary: >-
-        Returns information about a specific file.
-      description: >-
-        Returns information about a specific file.
-      parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-    delete:
-      responses:
-        '200':
-          description: >-
-            An OpenAIFileDeleteResponse indicating successful deletion.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIFileDeleteResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Files
-      summary: Delete a file.
-      description: Delete a file.
-      parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
   /v1/vector_stores/{vector_store_id}:
     get:
       responses:
@@ -3887,103 +3263,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}:
-    get:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreObject representing the vector store.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Retrieves a vector store.
-      description: Retrieves a vector store.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to retrieve.
-          required: true
-          schema:
-            type: string
-    post:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreObject representing the updated vector store.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Updates a vector store.
-      description: Updates a vector store.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to update.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest'
-        required: true
-    delete:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreDeleteResponse indicating the deletion status.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreDeleteResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Delete a vector store.
-      description: Delete a vector store.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to delete.
-          required: true
-          schema:
-            type: string
   /v1/vector_stores/{vector_store_id}/files/{file_id}:
     get:
       responses:
@@ -4102,124 +3381,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
-    get:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreFileObject representing the file.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreFileObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Retrieves a vector store file.
-      description: Retrieves a vector store file.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-    post:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreFileObject representing the updated file.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreFileObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Updates a vector store file.
-      description: Updates a vector store file.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to update.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to update.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest'
-        required: true
-    delete:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreFileDeleteResponse indicating the deletion status.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreFileDeleteResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Delete a vector store file.
-      description: Delete a vector store file.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to delete.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to delete.
-          required: true
-          schema:
-            type: string
   /v1/embeddings:
     post:
       responses:
@@ -4255,41 +3416,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
         required: true
-  /v1/openai/v1/embeddings:
-    post:
-      responses:
-        '200':
-          description: >-
-            An OpenAIEmbeddingsResponse containing the embeddings.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-      description: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
-        required: true
   /v1/files:
     get:
       responses:
@@ -4401,141 +3527,6 @@ paths:
                 - file
                 - purpose
         required: true
-  /v1/openai/v1/files:
-    get:
-      responses:
-        '200':
-          description: >-
-            An ListOpenAIFileResponse containing the list of files.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListOpenAIFileResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
-      description: >-
-        Returns a list of files that belong to the user's organization.
-      parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list. For instance, if you make a list request and receive
-            100 objects, ending with obj_foo, your subsequent call can include after=obj_foo
-            in order to fetch the next page of the list.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 10,000, and the default is 10,000.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-        - name: purpose
-          in: query
-          description: >-
-            Only return files with the given purpose.
-          required: false
-          schema:
-            $ref: '#/components/schemas/OpenAIFilePurpose'
-    post:
-      responses:
-        '200':
-          description: >-
-            An OpenAIFileObject representing the uploaded file.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIFileObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
-      description: >-
-        Upload a file that can be used across various endpoints.
-
-        The file upload should be a multipart form request with:
-
-        - file: The File object (not file name) to be uploaded.
-
-        - purpose: The intended purpose of the uploaded file.
-
-        - expires_after: Optional form values describing expiration for the file.
-      parameters: []
-      requestBody:
-        content:
-          multipart/form-data:
-            schema:
-              type: object
-              properties:
-                file:
-                  type: string
-                  format: binary
-                purpose:
-                  $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after:
-                  $ref: '#/components/schemas/ExpiresAfter'
-              required:
-                - file
-                - purpose
-        required: true
-  /v1/openai/v1/models:
-    get:
-      responses:
-        '200':
-          description: A OpenAIListModelsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIListModelsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: List models using the OpenAI API.
-      description: List models using the OpenAI API.
-      parameters: []
   /v1/files/{file_id}/content:
     get:
       responses:
@@ -4570,40 +3561,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/files/{file_id}/content:
-    get:
-      responses:
-        '200':
-          description: >-
-            The raw file content as a binary response.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Response'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Files
-      summary: >-
-        Returns the contents of the specified file.
-      description: >-
-        Returns the contents of the specified file.
-      parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
   /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
     get:
       responses:
@@ -4644,46 +3601,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
-    get:
-      responses:
-        '200':
-          description: >-
-            A list of InterleavedContent representing the file contents.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: >-
-        Retrieves the contents of a vector store file.
-      description: >-
-        Retrieves the contents of a vector store file.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
   /v1/vector_stores/{vector_store_id}/search:
     post:
       responses:
@@ -4725,47 +3642,6 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
-  /v1/openai/v1/vector_stores/{vector_store_id}/search:
-    post:
-      responses:
-        '200':
-          description: >-
-            A VectorStoreSearchResponse containing the search results.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/VectorStoreSearchResponsePage'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - VectorIO
-      summary: Search for chunks in a vector store.
-      description: >-
-        Search for chunks in a vector store.
-
-        Searches a vector store for relevant chunks based on a query and optional
-        file attribute filters.
-      parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to search.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
-        required: true
   /v1alpha/post-training/preference-optimize:
     post:
       responses:
@@ -5185,38 +4061,6 @@ paths:
             schema:
               $ref: '#/components/schemas/RunModerationRequest'
         required: true
-  /v1/openai/v1/moderations:
-    post:
-      responses:
-        '200':
-          description: A moderation object.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModerationObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
-      description: >-
-        Classifies if text and/or image inputs are potentially harmful.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RunModerationRequest'
-        required: true
   /v1/safety/run-shield:
     post:
       responses:
@@ -12817,38 +11661,6 @@ components:
       title: VectorStoreListFilesResponse
       description: >-
         Response from listing files in a vector store.
-    OpenAIModel:
-      type: object
-      properties:
-        id:
-          type: string
-        object:
-          type: string
-          const: model
-          default: model
-        created:
-          type: integer
-        owned_by:
-          type: string
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-      title: OpenAIModel
-      description: A model from OpenAI.
-    OpenAIListModelsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIModel'
-      additionalProperties: false
-      required:
-        - data
-      title: OpenAIListModelsResponse
     VectorStoreListResponse:
       type: object
       properties:
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index de420be5d..e8d0c467a 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -694,7 +694,6 @@ class Agents(Protocol):
     #
     # Both of these APIs are inherently stateful.
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
@@ -707,7 +706,6 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
@@ -733,7 +731,6 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
@@ -752,7 +749,6 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
@@ -775,7 +771,6 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.
diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py
index 1a64257e3..1ee9fdb15 100644
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@@ -43,7 +43,6 @@ class Batches(Protocol):
     Note: This API is currently under active development and may undergo changes.
     """
 
-    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
@@ -64,7 +63,6 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
@@ -74,7 +72,6 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
@@ -84,7 +81,6 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index e4cf6283a..0cc491fae 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -105,7 +105,6 @@ class OpenAIFileDeleteResponse(BaseModel):
 @trace_protocol
 class Files(Protocol):
     # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
@@ -128,7 +127,6 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
@@ -148,7 +146,6 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
@@ -162,7 +159,6 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
@@ -176,7 +172,6 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 29b014a11..f8611b224 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1089,7 +1089,6 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
@@ -1141,7 +1140,6 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
@@ -1198,7 +1196,6 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
@@ -1228,7 +1225,6 @@ class Inference(InferenceProvider):
     - Embedding models: these models generate embeddings to be used for semantic search.
     """
 
-    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
@@ -1247,7 +1243,6 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
 
-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index a4f6a888b..d8860654b 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -111,14 +111,6 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
-    async def openai_list_models(self) -> OpenAIListModelsResponse:
-        """List models using the OpenAI API.
-
-        :returns: A OpenAIListModelsResponse.
-        """
-        ...
-
     @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_model(
         self,
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index d9ef6b2a1..bf37b496a 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -114,7 +114,6 @@ class Safety(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
         """Classifies if text and/or image inputs are potentially harmful.
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index dfd93e481..cea2a6917 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -473,7 +473,6 @@ class VectorIO(Protocol):
         ...
 
     # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
@@ -500,7 +499,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
@@ -519,7 +517,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
@@ -532,7 +529,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store(
         self,
@@ -551,7 +547,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store(
         self,
@@ -564,7 +559,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_search_vector_store(
         self,
@@ -591,7 +585,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_attach_file_to_vector_store(
         self,
@@ -610,7 +603,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     @webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files_in_vector_store(
         self,
@@ -633,9 +625,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
-    )
     @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store_file(
         self,
@@ -650,11 +639,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files/{file_id}/content",
         method="GET",
@@ -673,9 +657,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
-    )
     @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store_file(
         self,
@@ -692,9 +673,6 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
-    )
     @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store_file(
         self,

From 6cce553c930b2152cf7215ab2c00971ed0341c0d Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 29 Sep 2025 23:11:41 -0700
Subject: [PATCH 12/13] fix: mcp tool with array type should include items
 (#3602)

# What does this PR do?
Fixes error:
```
[ERROR] Error executing endpoint route='/v1/openai/v1/responses'
         method='post': Error code: 400 - {'error': {'message': "Invalid schema for function 'pods_exec': In context=('properties', 'command'), array
         schema missing items.", 'type': 'invalid_request_error', 'param': 'tools[7].function.parameters', 'code': 'invalid_function_parameters'}}
```

From script:
```
#!/usr/bin/env python3
"""
Script to test Responses API with kubernetes-mcp-server.

This script:
1. Connects to the llama stack server
2. Uses the Responses API with MCP tools
3. Asks for the list of Kubernetes namespaces using the kubernetes-mcp-server
"""

import json

from openai import OpenAI

# Connect to the llama stack server
base_url = "http://localhost:8321/v1/openai/v1"
client = OpenAI(base_url=base_url, api_key="fake")

# Define the MCP tool pointing to the kubernetes-mcp-server
# The kubernetes-mcp-server is running on port 3000 with SSE endpoint at /sse
mcp_server_url = "http://localhost:3000/sse"

tools = [
    {
        "type": "mcp",
        "server_label": "k8s",
        "server_url": mcp_server_url,
    }
]

# Create a response request asking for k8s namespaces
print("Sending request to list Kubernetes namespaces...")
print(f"Using MCP server at: {mcp_server_url}")
print("Available tools will be listed automatically by the MCP server.")
print()

response = client.responses.create(
    # model="meta-llama/Llama-3.2-3B-Instruct",  # Using the vllm model
    model="openai/gpt-4o",
    input="what are all the Kubernetes namespaces? Use tool call to `namespaces_list`. make sure to adhere to the tool calling format.",
    tools=tools,
    stream=False,
)

print("\n" + "=" * 80)
print("RESPONSE OUTPUT:")
print("=" * 80)

# Print the output
for i, output in enumerate(response.output):
    print(f"\n[Output {i + 1}] Type: {output.type}")
    if output.type == "mcp_list_tools":
        print(f"  Server: {output.server_label}")
        print(f"  Tools available: {[t.name for t in output.tools]}")
    elif output.type == "mcp_call":
        print(f"  Tool called: {output.name}")
        print(f"  Arguments: {output.arguments}")
        print(f"  Result: {output.output}")
        if output.error:
            print(f"  Error: {output.error}")
    elif output.type == "message":
        print(f"  Role: {output.role}")
        print(f"  Content: {output.content}")

print("\n" + "=" * 80)
print("FINAL RESPONSE TEXT:")
print("=" * 80)
print(response.output_text)
```


## Test Plan
new unit tests
script now runs successfully
---
 .../meta_reference/responses/streaming.py     | 48 ++++++++++++-------
 tests/unit/providers/inline/__init__.py       |  5 ++
 .../unit/providers/inline/agents/__init__.py  |  5 ++
 .../inline/agents/meta_reference/__init__.py  |  5 ++
 .../meta_reference/responses/__init__.py      |  5 ++
 .../responses/test_streaming.py               | 42 ++++++++++++++++
 6 files changed, 93 insertions(+), 17 deletions(-)
 create mode 100644 tests/unit/providers/inline/__init__.py
 create mode 100644 tests/unit/providers/inline/agents/__init__.py
 create mode 100644 tests/unit/providers/inline/agents/meta_reference/__init__.py
 create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py
 create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 3e69fa5cd..2f45ad2a3 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -50,6 +50,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal
 logger = get_logger(name=__name__, category="agents::meta_reference")
 
 
+def convert_tooldef_to_chat_tool(tool_def):
+    """Convert a ToolDef to OpenAI ChatCompletionToolParam format.
+
+    Args:
+        tool_def: ToolDef from the tools API
+
+    Returns:
+        ChatCompletionToolParam suitable for OpenAI chat completion
+    """
+
+    from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+    from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+
+    internal_tool_def = ToolDefinition(
+        tool_name=tool_def.name,
+        description=tool_def.description,
+        parameters={
+            param.name: ToolParamDefinition(
+                param_type=param.parameter_type,
+                description=param.description,
+                required=param.required,
+                default=param.default,
+                items=param.items,
+            )
+            for param in tool_def.parameters
+        },
+    )
+    return convert_tooldef_to_openai_tool(internal_tool_def)
+
+
 class StreamingResponseOrchestrator:
     def __init__(
         self,
@@ -556,23 +586,7 @@ class StreamingResponseOrchestrator:
                     continue
                 if not always_allowed or t.name in always_allowed:
                     # Add to chat tools for inference
-                    from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
-                    from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
-
-                    tool_def = ToolDefinition(
-                        tool_name=t.name,
-                        description=t.description,
-                        parameters={
-                            param.name: ToolParamDefinition(
-                                param_type=param.parameter_type,
-                                description=param.description,
-                                required=param.required,
-                                default=param.default,
-                            )
-                            for param in t.parameters
-                        },
-                    )
-                    openai_tool = convert_tooldef_to_openai_tool(tool_def)
+                    openai_tool = convert_tooldef_to_chat_tool(t)
                     if self.ctx.chat_tools is None:
                         self.ctx.chat_tools = []
                     self.ctx.chat_tools.append(openai_tool)
diff --git a/tests/unit/providers/inline/__init__.py b/tests/unit/providers/inline/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/inline/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/agents/__init__.py b/tests/unit/providers/inline/agents/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/inline/agents/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/agents/meta_reference/__init__.py b/tests/unit/providers/inline/agents/meta_reference/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/inline/agents/meta_reference/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
new file mode 100644
index 000000000..6fda2b508
--- /dev/null
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.tools import ToolDef, ToolParameter
+from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
+    convert_tooldef_to_chat_tool,
+)
+
+
+def test_convert_tooldef_to_chat_tool_preserves_items_field():
+    """Test that array parameters preserve the items field during conversion.
+
+    This test ensures that when converting ToolDef with array-type parameters
+    to OpenAI ChatCompletionToolParam format, the 'items' field is preserved.
+    Without this fix, array parameters would be missing schema information about their items.
+    """
+    tool_def = ToolDef(
+        name="test_tool",
+        description="A test tool with array parameter",
+        parameters=[
+            ToolParameter(
+                name="tags",
+                parameter_type="array",
+                description="List of tags",
+                required=True,
+                items={"type": "string"},
+            )
+        ],
+    )
+
+    result = convert_tooldef_to_chat_tool(tool_def)
+
+    assert result["type"] == "function"
+    assert result["function"]["name"] == "test_tool"
+
+    tags_param = result["function"]["parameters"]["properties"]["tags"]
+    assert tags_param["type"] == "array"
+    assert "items" in tags_param, "items field should be preserved for array parameters"
+    assert tags_param["items"] == {"type": "string"}

From 0cc072dcafb3426894eb77231b4d90b3edd06196 Mon Sep 17 00:00:00 2001
From: Eric Huang <erichuang@meta.com>
Date: Tue, 30 Sep 2025 11:24:27 -0700
Subject: [PATCH 13/13] fix: don't pass default response format in Responses

# What does this PR do?


## Test Plan
---
 .../inline/agents/meta_reference/responses/streaming.py      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 2f45ad2a3..179f7f023 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -127,13 +127,16 @@ class StreamingResponseOrchestrator:
         messages = self.ctx.messages.copy()
 
         while True:
+            # Text is the default response format for chat completion so don't need to pass it
+            # (some providers don't support non-empty response_format when tools are present)
+            response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
             completion_result = await self.inference_api.openai_chat_completion(
                 model=self.ctx.model,
                 messages=messages,
                 tools=self.ctx.chat_tools,
                 stream=True,
                 temperature=self.ctx.temperature,
-                response_format=self.ctx.response_format,
+                response_format=response_format,
             )
 
             # Process streaming chunks and build complete response