Merge branch 'main' into issue-3443-require_approval

2025-10-04 04:04:14 +00:00 · 2025-09-30 12:10:13 -04:00 · 2025-09-30 12:10:13 -04:00 · d2fdc70a8d
commit d2fdc70a8d
parent aea9ed33fc 6cce553c93
72 changed files with 1380 additions and 1406 deletions
--- a/docs/docs/providers/openai.mdx
+++ b/docs/docs/providers/openai.mdx
@ -7,7 +7,7 @@ sidebar_position: 1

 ### Server path

-Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
+Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.

 ### Clients

@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")

 #### OpenAI Client

-When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
+When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.

 ```python
 from openai import OpenAI

-client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
+client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
 ```

 Regardless of the client you choose, the following code examples should all work the same.
--- a/docs/docs/references/llama_cli_reference/index.md
+++ b/docs/docs/references/llama_cli_reference/index.md
@ -261,7 +261,7 @@ You can even run `llama model prompt-format` see all of the templates and their
 ```
 llama model prompt-format -m Llama3.2-3B-Instruct
 ```
-![alt text](../../../resources/prompt-format.png)
+![alt text](/img/prompt-format.png)


 You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
--- a/docs/docs/references/python_sdk_reference/index.md
+++ b/docs/docs/references/python_sdk_reference/index.md
@ -217,7 +217,6 @@ from llama_stack_client.types import (
 Methods:

 - <code title="post /v1/inference/chat-completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
- <code title="post /v1/inference/completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_response.py">InferenceCompletionResponse</a></code>
 - <code title="post /v1/inference/embeddings">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>

 ## VectorIo
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -824,16 +824,10 @@
        "\n",
        "\n",
        "user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
-        "response = client.inference.completion(\n",
-        "    model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
-        "    content=user_input,\n",
-        "    stream=False,\n",
-        "    sampling_params={\n",
-        "        \"strategy\": {\n",
-        "            \"type\": \"greedy\",\n",
-        "        },\n",
-        "        \"max_tokens\": 50,\n",
-        "    },\n",
+        "response = client.chat.completions.create(\n",
+        "    model=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
+        "    messages=[{\"role\": \"user\", \"content\": user_input}],\n",
+        "    max_tokens=50,\n",
        "    response_format={\n",
        "        \"type\": \"json_schema\",\n",
        "        \"json_schema\": Output.model_json_schema(),\n",
@ -1013,7 +1007,7 @@
        "\n",
        "\n",
        "\n",
-        "<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/resources/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
+        "<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/static/img/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
        "\n",
        "\n",
        "Agents are characterized by having access to\n",
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@ -706,20 +706,15 @@
    "    provider_id=\"nvidia\",\n",
    ")\n",
    "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
    "    stream=False,\n",
-    "    model_id=CUSTOMIZED_MODEL_DIR,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=CUSTOMIZED_MODEL_DIR,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
    ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
   ]
  },
  {
@ -1233,20 +1228,15 @@
    "    provider_id=\"nvidia\",\n",
    ")\n",
    "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
    "    stream=False,\n",
-    "    model_id=customized_chat_model_dir,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=customized_chat_model_dir,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
    ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
   ]
  },
  {
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 import hashlib
+import inspect
 import ipaddress
 import types
 import typing
@ -12,6 +13,7 @@ from dataclasses import make_dataclass
 from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union

 from fastapi import UploadFile
+from pydantic import BaseModel

 from llama_stack.apis.datatypes import Error
 from llama_stack.strong_typing.core import JsonType
@ -632,14 +634,22 @@ class Generator:
                    base_type = get_args(param_type)[0]
                else:
                    base_type = param_type
+
+                # Check if the type is optional
+                is_optional = is_type_optional(base_type)
+                if is_optional:
+                    base_type = unwrap_optional_type(base_type)
+
                if base_type is UploadFile:
                    # File upload
                    properties[name] = {"type": "string", "format": "binary"}
                else:
-                    # Form field
+                    # All other types - generate schema reference
+                    # This includes enums, BaseModels, and simple types
                    properties[name] = self.schema_builder.classdef_to_ref(base_type)

-                required_fields.append(name)
+                if not is_optional:
+                    required_fields.append(name)

            multipart_schema = {
                "type": "object",
--- a/docs/static/img/agentic-system.png
+++ b/docs/static/img/agentic-system.png
--- a/docs/static/img/list-templates.png
+++ b/docs/static/img/list-templates.png
--- a/docs/static/img/llama-stack.png
+++ b/docs/static/img/llama-stack.png
--- a/docs/static/img/model-lifecycle.png
+++ b/docs/static/img/model-lifecycle.png
--- a/docs/static/img/prompt-format.png
+++ b/docs/static/img/prompt-format.png
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -286,7 +286,7 @@ paths:
            schema:
              $ref: '#/components/schemas/CreateAgentTurnRequest'
        required: true
-  /v1/openai/v1/responses:
+  /v1/responses:
    get:
      responses:
        '200':
@ -558,7 +558,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/responses/{response_id}:
+  /v1/responses/{response_id}:
    get:
      responses:
        '200':
@ -720,41 +720,6 @@ paths:
          required: true
          schema:
            type: string
-  /v1/inference/embeddings:
-    post:
-      responses:
-        '200':
-          description: >-
-            An array of embeddings, one for each content. Each embedding is a list
-            of floats. The dimensionality of the embedding is model-specific; you
-            can check model metadata using /models/{model_id}.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate embeddings for content pieces using the specified model.
-      description: >-
-        Generate embeddings for content pieces using the specified model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
-        required: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -1033,7 +998,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/chat/completions/{completion_id}:
+  /v1/chat/completions/{completion_id}:
    get:
      responses:
        '200':
@ -2259,7 +2224,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-  /v1/openai/v1/chat/completions:
+  /v1/chat/completions:
    get:
      responses:
        '200':
@ -2452,7 +2417,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
-  /v1/openai/v1/responses/{response_id}/input_items:
+  /v1/responses/{response_id}/input_items:
    get:
      responses:
        '200':
@ -2906,7 +2871,7 @@ paths:
            schema:
              $ref: '#/components/schemas/LogEventRequest'
        required: true
-  /v1/openai/v1/vector_stores/{vector_store_id}/files:
+  /v1/vector_stores/{vector_store_id}/files:
    get:
      responses:
        '200':
@ -3015,7 +2980,7 @@ paths:
            schema:
              $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
        required: true
-  /v1/openai/v1/completions:
+  /v1/completions:
    post:
      responses:
        '200':
@ -3049,7 +3014,7 @@ paths:
            schema:
              $ref: '#/components/schemas/OpenaiCompletionRequest'
        required: true
-  /v1/openai/v1/vector_stores:
+  /v1/vector_stores:
    get:
      responses:
        '200':
@ -3136,7 +3101,7 @@ paths:
            schema:
              $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
        required: true
-  /v1/openai/v1/files/{file_id}:
+  /v1/files/{file_id}:
    get:
      responses:
        '200':
@ -3201,7 +3166,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}:
+  /v1/vector_stores/{vector_store_id}:
    get:
      responses:
        '200':
@ -3298,7 +3263,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
+  /v1/vector_stores/{vector_store_id}/files/{file_id}:
    get:
      responses:
        '200':
@ -3416,7 +3381,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/embeddings:
+  /v1/embeddings:
    post:
      responses:
        '200':
@ -3451,7 +3416,7 @@ paths:
            schema:
              $ref: '#/components/schemas/OpenaiEmbeddingsRequest'
        required: true
-  /v1/openai/v1/files:
+  /v1/files:
    get:
      responses:
        '200':
@ -3544,8 +3509,6 @@ paths:
        - purpose: The intended purpose of the uploaded file.

        - expires_after: Optional form values describing expiration for the file.
-        Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
-        Seconds must be between 3600 and 2592000 (1 hour to 30 days).
      parameters: []
      requestBody:
        content:
@ -3558,45 +3521,13 @@ paths:
                  format: binary
                purpose:
                  $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after_anchor:
-                  oneOf:
-                    - type: string
-                    - type: 'null'
-                expires_after_seconds:
-                  oneOf:
-                    - type: integer
-                    - type: 'null'
+                expires_after:
+                  $ref: '#/components/schemas/ExpiresAfter'
              required:
                - file
                - purpose
-                - expires_after_anchor
-                - expires_after_seconds
        required: true
-  /v1/openai/v1/models:
-    get:
-      responses:
-        '200':
-          description: A OpenAIListModelsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/OpenAIListModelsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: List models using the OpenAI API.
-      description: List models using the OpenAI API.
-      parameters: []
-  /v1/openai/v1/files/{file_id}/content:
+  /v1/files/{file_id}/content:
    get:
      responses:
        '200':
@ -3630,7 +3561,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
+  /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
    get:
      responses:
        '200':
@ -3670,7 +3601,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/vector_stores/{vector_store_id}/search:
+  /v1/vector_stores/{vector_store_id}/search:
    post:
      responses:
        '200':
@ -3930,7 +3861,7 @@ paths:
            schema:
              $ref: '#/components/schemas/QueryTracesRequest'
        required: true
-  /v1/inference/rerank:
+  /v1alpha/inference/rerank:
    post:
      responses:
        '200':
@ -4098,7 +4029,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RunEvalRequest'
        required: true
-  /v1/openai/v1/moderations:
+  /v1/moderations:
    post:
      responses:
        '200':
@ -4656,7 +4587,16 @@ components:
      type: object
      properties:
        strategy:
-          $ref: '#/components/schemas/SamplingStrategy'
+          oneOf:
+            - $ref: '#/components/schemas/GreedySamplingStrategy'
+            - $ref: '#/components/schemas/TopPSamplingStrategy'
+            - $ref: '#/components/schemas/TopKSamplingStrategy'
+          discriminator:
+            propertyName: type
+            mapping:
+              greedy: '#/components/schemas/GreedySamplingStrategy'
+              top_p: '#/components/schemas/TopPSamplingStrategy'
+              top_k: '#/components/schemas/TopKSamplingStrategy'
          description: The sampling strategy.
        max_tokens:
          type: integer
@ -4684,17 +4624,6 @@ components:
        - strategy
      title: SamplingParams
      description: Sampling parameters.
-    SamplingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/GreedySamplingStrategy'
-        - $ref: '#/components/schemas/TopPSamplingStrategy'
-        - $ref: '#/components/schemas/TopKSamplingStrategy'
-      discriminator:
-        propertyName: type
-        mapping:
-          greedy: '#/components/schemas/GreedySamplingStrategy'
-          top_p: '#/components/schemas/TopPSamplingStrategy'
-          top_k: '#/components/schemas/TopKSamplingStrategy'
    SystemMessage:
      type: object
      properties:
@ -5141,7 +5070,16 @@ components:
            - progress
          description: Type of the event
        delta:
-          $ref: '#/components/schemas/ContentDelta'
+          oneOf:
+            - $ref: '#/components/schemas/TextDelta'
+            - $ref: '#/components/schemas/ImageDelta'
+            - $ref: '#/components/schemas/ToolCallDelta'
+          discriminator:
+            propertyName: type
+            mapping:
+              text: '#/components/schemas/TextDelta'
+              image: '#/components/schemas/ImageDelta'
+              tool_call: '#/components/schemas/ToolCallDelta'
          description: >-
            Content generated since last event. This can be one or more tokens, or
            a tool call.
@ -5184,17 +5122,6 @@ components:
      title: ChatCompletionResponseStreamChunk
      description: >-
        A chunk of a streamed chat completion response.
-    ContentDelta:
-      oneOf:
-        - $ref: '#/components/schemas/TextDelta'
-        - $ref: '#/components/schemas/ImageDelta'
-        - $ref: '#/components/schemas/ToolCallDelta'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/TextDelta'
-          image: '#/components/schemas/ImageDelta'
-          tool_call: '#/components/schemas/ToolCallDelta'
    ImageDelta:
      type: object
      properties:
@ -5876,7 +5803,22 @@ components:
      type: object
      properties:
        payload:
-          $ref: '#/components/schemas/AgentTurnResponseEventPayload'
+          oneOf:
+            - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+            - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
+          discriminator:
+            propertyName: event_type
+            mapping:
+              step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
+              step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
+              step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
+              turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
+              turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
+              turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
          description: >-
            Event-specific payload containing event data
      additionalProperties: false
@ -5885,23 +5827,6 @@ components:
      title: AgentTurnResponseEvent
      description: >-
        An event in an agent turn response stream.
-    AgentTurnResponseEventPayload:
-      oneOf:
-        - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-        - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
-      discriminator:
-        propertyName: event_type
-        mapping:
-          step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
-          step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
-          step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
-          turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
-          turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
-          turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
    AgentTurnResponseStepCompletePayload:
      type: object
      properties:
@ -5980,7 +5905,16 @@ components:
          description: >-
            Unique identifier for the step within a turn
        delta:
-          $ref: '#/components/schemas/ContentDelta'
+          oneOf:
+            - $ref: '#/components/schemas/TextDelta'
+            - $ref: '#/components/schemas/ImageDelta'
+            - $ref: '#/components/schemas/ToolCallDelta'
+          discriminator:
+            propertyName: type
+            mapping:
+              text: '#/components/schemas/TextDelta'
+              image: '#/components/schemas/ImageDelta'
+              tool_call: '#/components/schemas/ToolCallDelta'
          description: >-
            Incremental content changes during step execution
      additionalProperties: false
@ -6968,10 +6902,6 @@ components:
          type: string
          description: >-
            (Optional) Truncation strategy applied to the response
-        user:
-          type: string
-          description: >-
-            (Optional) User identifier associated with the request
      additionalProperties: false
      required:
        - created_at
@ -7104,15 +7034,6 @@ components:
      title: OpenAIResponseOutputMessageMCPListTools
      description: >-
        MCP list tools output message containing available tools from an MCP server.
-    OpenAIResponseContentPart:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-        - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-      discriminator:
-        propertyName: type
-        mapping:
-          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
    OpenAIResponseContentPartOutputText:
      type: object
      properties:
@ -7220,7 +7141,14 @@ components:
          description: >-
            Unique identifier of the output item containing this content part
        part:
-          $ref: '#/components/schemas/OpenAIResponseContentPart'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+          discriminator:
+            propertyName: type
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
          description: The content part that was added
        sequence_number:
          type: integer
@ -7255,7 +7183,14 @@ components:
          description: >-
            Unique identifier of the output item containing this content part
        part:
-          $ref: '#/components/schemas/OpenAIResponseContentPart'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+          discriminator:
+            propertyName: type
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
          description: The completed content part
        sequence_number:
          type: integer
@ -7541,7 +7476,22 @@ components:
          description: >-
            Unique identifier of the response containing this output
        item:
-          $ref: '#/components/schemas/OpenAIResponseOutput'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          discriminator:
+            propertyName: type
+            mapping:
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
          description: >-
            The output item that was added (message, tool call, etc.)
        output_index:
@ -7577,7 +7527,22 @@ components:
          description: >-
            Unique identifier of the response containing this output
        item:
-          $ref: '#/components/schemas/OpenAIResponseOutput'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          discriminator:
+            propertyName: type
+            mapping:
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
          description: >-
            The completed output item (message, tool call, etc.)
        output_index:
@ -7846,72 +7811,6 @@ components:
      title: OpenAIDeleteResponseObject
      description: >-
        Response object confirming deletion of an OpenAI response.
-    EmbeddingsRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: >-
-            The identifier of the model to use. The model must be an embedding model
-            registered with Llama Stack and available via the /models endpoint.
-        contents:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-          description: >-
-            List of contents to generate embeddings for. Each content can be a string
-            or an InterleavedContentItem (and hence can be multimodal). The behavior
-            depends on the model and provider. Some models may only support text.
-        text_truncation:
-          type: string
-          enum:
-            - none
-            - start
-            - end
-          description: >-
-            (Optional) Config for how to truncate text for embedding when text is
-            longer than the model's max sequence length.
-        output_dimension:
-          type: integer
-          description: >-
-            (Optional) Output dimensionality for the embeddings. Only supported by
-            Matryoshka models.
-        task_type:
-          type: string
-          enum:
-            - query
-            - document
-          description: >-
-            (Optional) How is the embedding being used? This is only supported by
-            asymmetric embedding models.
-      additionalProperties: false
-      required:
-        - model_id
-        - contents
-      title: EmbeddingsRequest
-    EmbeddingsResponse:
-      type: object
-      properties:
-        embeddings:
-          type: array
-          items:
-            type: array
-            items:
-              type: number
-          description: >-
-            List of embedding vectors, one per input content. Each embedding is a
-            list of floats. The dimensionality of the embedding is model-specific;
-            you can check model metadata using /models/{model_id}
-      additionalProperties: false
-      required:
-        - embeddings
-      title: EmbeddingsResponse
-      description: >-
-        Response containing generated embeddings.
    AgentCandidate:
      type: object
      properties:
@ -7966,7 +7865,14 @@ components:
      type: object
      properties:
        eval_candidate:
-          $ref: '#/components/schemas/EvalCandidate'
+          oneOf:
+            - $ref: '#/components/schemas/ModelCandidate'
+            - $ref: '#/components/schemas/AgentCandidate'
+          discriminator:
+            propertyName: type
+            mapping:
+              model: '#/components/schemas/ModelCandidate'
+              agent: '#/components/schemas/AgentCandidate'
          description: The candidate to evaluate.
        scoring_params:
          type: object
@ -7987,15 +7893,6 @@ components:
      title: BenchmarkConfig
      description: >-
        A benchmark configuration for evaluation.
-    EvalCandidate:
-      oneOf:
-        - $ref: '#/components/schemas/ModelCandidate'
-        - $ref: '#/components/schemas/AgentCandidate'
-      discriminator:
-        propertyName: type
-        mapping:
-          model: '#/components/schemas/ModelCandidate'
-          agent: '#/components/schemas/AgentCandidate'
    LLMAsJudgeScoringFnParams:
      type: object
      properties:
@ -8459,7 +8356,20 @@ components:
      type: object
      properties:
        message:
-          $ref: '#/components/schemas/OpenAIMessageParam'
+          oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam'
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+          discriminator:
+            propertyName: role
+            mapping:
+              user: '#/components/schemas/OpenAIUserMessageParam'
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              developer: '#/components/schemas/OpenAIDeveloperMessageParam'
          description: The message from the model
        finish_reason:
          type: string
@ -8752,15 +8662,6 @@ components:
        - model
        - input_messages
      title: OpenAICompletionWithInputMessages
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
    Dataset:
      type: object
      properties:
@ -8795,7 +8696,14 @@ components:
          description: >-
            Purpose of the dataset indicating its intended use
        source:
-          $ref: '#/components/schemas/DataSource'
+          oneOf:
+            - $ref: '#/components/schemas/URIDataSource'
+            - $ref: '#/components/schemas/RowsDataSource'
+          discriminator:
+            propertyName: type
+            mapping:
+              uri: '#/components/schemas/URIDataSource'
+              rows: '#/components/schemas/RowsDataSource'
          description: >-
            Data source configuration for the dataset
        metadata:
@ -9041,31 +8949,6 @@ components:
        - type
      title: ObjectType
      description: Parameter type for object values.
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-        - $ref: '#/components/schemas/AgentTurnInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-          agent_turn_input: '#/components/schemas/AgentTurnInputType'
    ScoringFn:
      type: object
      properties:
@ -9104,7 +8987,30 @@ components:
              - type: array
              - type: object
        return_type:
-          $ref: '#/components/schemas/ParamType'
+          oneOf:
+            - $ref: '#/components/schemas/StringType'
+            - $ref: '#/components/schemas/NumberType'
+            - $ref: '#/components/schemas/BooleanType'
+            - $ref: '#/components/schemas/ArrayType'
+            - $ref: '#/components/schemas/ObjectType'
+            - $ref: '#/components/schemas/JsonType'
+            - $ref: '#/components/schemas/UnionType'
+            - $ref: '#/components/schemas/ChatCompletionInputType'
+            - $ref: '#/components/schemas/CompletionInputType'
+            - $ref: '#/components/schemas/AgentTurnInputType'
+          discriminator:
+            propertyName: type
+            mapping:
+              string: '#/components/schemas/StringType'
+              number: '#/components/schemas/NumberType'
+              boolean: '#/components/schemas/BooleanType'
+              array: '#/components/schemas/ArrayType'
+              object: '#/components/schemas/ObjectType'
+              json: '#/components/schemas/JsonType'
+              union: '#/components/schemas/UnionType'
+              chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+              completion_input: '#/components/schemas/CompletionInputType'
+              agent_turn_input: '#/components/schemas/AgentTurnInputType'
        params:
          $ref: '#/components/schemas/ScoringFnParams'
      additionalProperties: false
@ -10234,10 +10140,6 @@ components:
          type: string
          description: >-
            (Optional) Truncation strategy applied to the response
-        user:
-          type: string
-          description: >-
-            (Optional) User identifier associated with the request
        input:
          type: array
          items:
@ -10560,7 +10462,14 @@ components:
          description: >-
            Event type identifier set to STRUCTURED_LOG
        payload:
-          $ref: '#/components/schemas/StructuredLogPayload'
+          oneOf:
+            - $ref: '#/components/schemas/SpanStartPayload'
+            - $ref: '#/components/schemas/SpanEndPayload'
+          discriminator:
+            propertyName: type
+            mapping:
+              span_start: '#/components/schemas/SpanStartPayload'
+              span_end: '#/components/schemas/SpanEndPayload'
          description: >-
            The structured payload data for the log event
      additionalProperties: false
@ -10573,15 +10482,6 @@ components:
      title: StructuredLogEvent
      description: >-
        A structured log event containing typed payload data.
-    StructuredLogPayload:
-      oneOf:
-        - $ref: '#/components/schemas/SpanStartPayload'
-        - $ref: '#/components/schemas/SpanEndPayload'
-      discriminator:
-        propertyName: type
-        mapping:
-          span_start: '#/components/schemas/SpanStartPayload'
-          span_end: '#/components/schemas/SpanEndPayload'
    StructuredLogType:
      type: string
      enum:
@ -10790,7 +10690,14 @@ components:
          description: >-
            Key-value attributes associated with the file
        chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+          discriminator:
+            propertyName: type
+            mapping:
+              auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
          description: >-
            Strategy used for splitting the file into chunks
        created_at:
@ -11805,38 +11712,6 @@ components:
      title: VectorStoreListFilesResponse
      description: >-
        Response from listing files in a vector store.
-    OpenAIModel:
-      type: object
-      properties:
-        id:
-          type: string
-        object:
-          type: string
-          const: model
-          default: model
-        created:
-          type: integer
-        owned_by:
-          type: string
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-      title: OpenAIModel
-      description: A model from OpenAI.
-    OpenAIListModelsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIModel'
-      additionalProperties: false
-      required:
-        - data
-      title: OpenAIListModelsResponse
    VectorStoreListResponse:
      type: object
      properties:
@ -12102,6 +11977,25 @@ components:
      required:
        - attributes
      title: OpenaiUpdateVectorStoreFileRequest
+    ExpiresAfter:
+      type: object
+      properties:
+        anchor:
+          type: string
+          const: created_at
+        seconds:
+          type: integer
+      additionalProperties: false
+      required:
+        - anchor
+        - seconds
+      title: ExpiresAfter
+      description: >-
+        Control expiration of uploaded files.
+
+        Params:
+         - anchor, must be "created_at"
+         - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
    DPOAlignmentConfig:
      type: object
      properties:
@ -12387,7 +12281,14 @@ components:
      type: object
      properties:
        query_generator_config:
-          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+          oneOf:
+            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          discriminator:
+            propertyName: type
+            mapping:
+              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
          description: Configuration for the query generator.
        max_tokens_in_context:
          type: integer
@ -12430,15 +12331,6 @@ components:
      title: RAGQueryConfig
      description: >-
        Configuration for the RAG query generation.
-    RAGQueryGeneratorConfig:
-      oneOf:
-        - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-        - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-      discriminator:
-        propertyName: type
-        mapping:
-          default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-          llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
    RAGSearchMode:
      type: string
      enum:
@ -12874,6 +12766,15 @@ components:
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
@ -12958,6 +12859,31 @@ components:
      required:
        - model_id
      title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+        - $ref: '#/components/schemas/AgentTurnInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          agent_turn_input: '#/components/schemas/AgentTurnInputType'
    RegisterScoringFunctionRequest:
      type: object
      properties: